mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Compare commits
127 Commits
edb1724239
...
cb2f74cb75
Author | SHA1 | Date | |
---|---|---|---|
|
cb2f74cb75 | ||
|
b94a7167a8 | ||
|
64e58baba1 | ||
|
a3fe155579 | ||
|
a997cfad2b | ||
|
f4b4b3cc35 | ||
|
cb24849396 | ||
|
7fd2207626 | ||
|
4f73c677ac | ||
|
69f45acfd7 | ||
|
4c78206d0a | ||
|
429e8ada79 | ||
|
06b49d18d9 | ||
|
a17a8febf7 | ||
|
55529ec5a2 | ||
|
3106653852 | ||
|
82dbb3bb32 | ||
|
1bcdde3e62 | ||
|
9c185374e4 | ||
|
13e82d6439 | ||
|
b08e727aef | ||
|
a210f98819 | ||
|
e574c49e25 | ||
|
7c5d55c6b2 | ||
|
80259659ff | ||
|
8db3dddb3d | ||
|
e30ebfa23e | ||
|
813bcd896f | ||
|
fd0c7a1c18 | ||
|
4704fb8a3b | ||
|
907ef0f717 | ||
|
ab6bb3b2a6 | ||
|
ec4e0ed1b2 | ||
|
3a299f382d | ||
|
f3654b8fc8 | ||
|
676b6238d0 | ||
|
e876997ebb | ||
|
19e2197582 | ||
|
d223c4547f | ||
|
58993d3f3b | ||
|
6f63a7b213 | ||
|
56cfa74a14 | ||
|
5473b5a051 | ||
|
dbb1d043fe | ||
|
f859939125 | ||
|
6307ada396 | ||
|
aab0d3dd9e | ||
|
5a34b9f24e | ||
|
a0a4858e00 | ||
|
7acc10444c | ||
|
ca1567da03 | ||
|
afd123f741 | ||
|
3ff86a4347 | ||
|
86515e1bce | ||
|
4401b4dda0 | ||
|
4f70f48272 | ||
|
7bf7b516a7 | ||
|
14542d6779 | ||
|
65019c4b9b | ||
|
190339c4e6 | ||
|
5a86371b02 | ||
|
9edc66d458 | ||
|
03c7f3817b | ||
|
f44eaa808d | ||
|
e388f6f99b | ||
|
a3e233a537 | ||
|
955412888c | ||
|
9633563fbd | ||
|
85e7641299 | ||
|
840d284e36 | ||
|
8e61a5c0b6 | ||
|
2ef36b36ac | ||
|
79fc8d67ad | ||
|
596ba574e3 | ||
|
e968984d17 | ||
|
7f6694b370 | ||
|
2183c73077 | ||
|
9076446617 | ||
|
a58d27166b | ||
|
3a486d79bf | ||
|
31e2205c4e | ||
|
93bcf2d8bc | ||
|
6b6cfd4e16 | ||
|
bb2716251b | ||
|
e0dbc53b58 | ||
|
f97551e2ad | ||
|
f88b5988c1 | ||
|
4bb2f7b3f6 | ||
|
95edca513c | ||
|
5004e4d2cc | ||
|
c61fc591c4 | ||
|
dcbc590302 | ||
|
b6c3619543 | ||
|
b2172af817 | ||
|
5ea4844d69 | ||
|
48e7057200 | ||
|
5a96290cce | ||
|
7e22af06f1 | ||
|
ac78184fe7 | ||
|
1777ff37c0 | ||
|
7dca59da56 | ||
|
0fa45c3954 | ||
|
c802d7d58a | ||
|
5ab06caffc | ||
|
737d7484c5 | ||
|
b3a742304e | ||
|
6514d72fea | ||
|
c3d4b429d9 | ||
|
7ff848c2c8 | ||
|
a11ba3f437 | ||
|
6604d94271 | ||
|
e30fa1da4d | ||
|
7ea3345e0d | ||
|
1e97d73bd0 | ||
|
f0e9703384 | ||
|
514941627b | ||
|
acc08c65d9 | ||
|
f1e4403f98 | ||
|
b1d53f0472 | ||
|
bc3cfb008e | ||
|
9791a2ea40 | ||
|
9fb9d16737 | ||
|
6be1d0724a | ||
|
9238520490 | ||
|
dd1bb579df | ||
|
57943798b7 | ||
|
b43c3d75a2 |
20
README.md
20
README.md
@ -40,17 +40,8 @@ Every month we get together with the community (users, contributors, customers,
|
||||
|
||||
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
|
||||
|
||||
The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov:
|
||||
Upcoming meetups
|
||||
|
||||
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
|
||||
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
|
||||
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
|
||||
|
||||
Other upcoming meetups
|
||||
|
||||
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
|
||||
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
|
||||
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
|
||||
* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18
|
||||
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
|
||||
* [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1
|
||||
@ -62,13 +53,20 @@ Other upcoming meetups
|
||||
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
|
||||
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
|
||||
|
||||
Recently completed events
|
||||
Recently completed meetups
|
||||
|
||||
* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
|
||||
* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
|
||||
* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
|
||||
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
|
||||
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
|
||||
* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
|
||||
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
|
||||
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
|
||||
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
|
||||
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
|
||||
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
|
||||
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
|
2
contrib/libpqxx
vendored
2
contrib/libpqxx
vendored
@ -1 +1 @@
|
||||
Subproject commit c995193a3a14d71f4711f1f421f65a1a1db64640
|
||||
Subproject commit 41e4c331564167cca97ad6eccbd5b8879c2ca044
|
@ -1,9 +1,9 @@
|
||||
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx")
|
||||
|
||||
set (SRCS
|
||||
"${LIBRARY_DIR}/src/strconv.cxx"
|
||||
"${LIBRARY_DIR}/src/array.cxx"
|
||||
"${LIBRARY_DIR}/src/binarystring.cxx"
|
||||
"${LIBRARY_DIR}/src/blob.cxx"
|
||||
"${LIBRARY_DIR}/src/connection.cxx"
|
||||
"${LIBRARY_DIR}/src/cursor.cxx"
|
||||
"${LIBRARY_DIR}/src/encodings.cxx"
|
||||
@ -12,59 +12,25 @@ set (SRCS
|
||||
"${LIBRARY_DIR}/src/field.cxx"
|
||||
"${LIBRARY_DIR}/src/largeobject.cxx"
|
||||
"${LIBRARY_DIR}/src/notification.cxx"
|
||||
"${LIBRARY_DIR}/src/params.cxx"
|
||||
"${LIBRARY_DIR}/src/pipeline.cxx"
|
||||
"${LIBRARY_DIR}/src/result.cxx"
|
||||
"${LIBRARY_DIR}/src/robusttransaction.cxx"
|
||||
"${LIBRARY_DIR}/src/row.cxx"
|
||||
"${LIBRARY_DIR}/src/sql_cursor.cxx"
|
||||
"${LIBRARY_DIR}/src/strconv.cxx"
|
||||
"${LIBRARY_DIR}/src/stream_from.cxx"
|
||||
"${LIBRARY_DIR}/src/stream_to.cxx"
|
||||
"${LIBRARY_DIR}/src/subtransaction.cxx"
|
||||
"${LIBRARY_DIR}/src/time.cxx"
|
||||
"${LIBRARY_DIR}/src/transaction.cxx"
|
||||
"${LIBRARY_DIR}/src/transaction_base.cxx"
|
||||
"${LIBRARY_DIR}/src/row.cxx"
|
||||
"${LIBRARY_DIR}/src/params.cxx"
|
||||
"${LIBRARY_DIR}/src/util.cxx"
|
||||
"${LIBRARY_DIR}/src/version.cxx"
|
||||
"${LIBRARY_DIR}/src/wait.cxx"
|
||||
)
|
||||
|
||||
# Need to explicitly include each header file, because in the directory include/pqxx there are also files
|
||||
# like just 'array'. So if including the whole directory with `target_include_directories`, it will make
|
||||
# conflicts with all includes of <array>.
|
||||
set (HDRS
|
||||
"${LIBRARY_DIR}/include/pqxx/array.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/params.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/binarystring.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/composite.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/connection.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/cursor.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/dbtransaction.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/errorhandler.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/except.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/field.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/isolation.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/largeobject.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/nontransaction.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/notification.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/pipeline.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/prepared_statement.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/result.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/robusttransaction.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/row.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/separated_list.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/strconv.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/stream_from.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/stream_to.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/subtransaction.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/transaction.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/transaction_base.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/types.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/util.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/version.hxx"
|
||||
"${LIBRARY_DIR}/include/pqxx/zview.hxx"
|
||||
)
|
||||
|
||||
add_library(_libpqxx ${SRCS} ${HDRS})
|
||||
|
||||
add_library(_libpqxx ${SRCS})
|
||||
target_link_libraries(_libpqxx PUBLIC ch_contrib::libpq)
|
||||
target_include_directories (_libpqxx SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include")
|
||||
|
||||
|
2
contrib/postgres
vendored
2
contrib/postgres
vendored
@ -1 +1 @@
|
||||
Subproject commit 665ff8c164d56d012e359735efe4d400c0564b44
|
||||
Subproject commit cfd77000af28469fcb650485bad65a35e7649e41
|
@ -1,18 +1,6 @@
|
||||
/* src/include/pg_config.h. Generated from pg_config.h.in by configure. */
|
||||
/* src/include/pg_config.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Define to the type of arg 1 of 'accept' */
|
||||
#define ACCEPT_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 of 'accept' */
|
||||
#define ACCEPT_TYPE_ARG2 struct sockaddr *
|
||||
|
||||
/* Define to the type of arg 3 of 'accept' */
|
||||
#define ACCEPT_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the return type of 'accept' */
|
||||
#define ACCEPT_TYPE_RETURN int
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||
|
||||
@ -49,6 +37,9 @@
|
||||
/* Define to the default TCP port number as a string constant. */
|
||||
#define DEF_PGPORT_STR "5432"
|
||||
|
||||
/* Define to the file name extension of dynamically-loadable modules. */
|
||||
#define DLSUFFIX ".so"
|
||||
|
||||
/* Define to build with GSSAPI support. (--with-gssapi) */
|
||||
//#define ENABLE_GSS 0
|
||||
|
||||
@ -122,6 +113,9 @@
|
||||
don't. */
|
||||
#define HAVE_DECL_SNPRINTF 1
|
||||
|
||||
/* Define to 1 if you have the declaration of `sigwait', and to 0 if you don't. */
|
||||
#define HAVE_DECL_SIGWAIT 1
|
||||
|
||||
/* Define to 1 if you have the declaration of `strlcat', and to 0 if you
|
||||
don't. */
|
||||
#if OS_DARWIN
|
||||
@ -257,6 +251,9 @@
|
||||
/* Define to 1 if you have the `inet_aton' function. */
|
||||
#define HAVE_INET_ATON 1
|
||||
|
||||
/* Define to 1 if you have the `inet_pton' function. */
|
||||
#define HAVE_INET_PTON 1
|
||||
|
||||
/* Define to 1 if the system has the type `int64'. */
|
||||
/* #undef HAVE_INT64 */
|
||||
|
||||
@ -323,6 +320,9 @@
|
||||
/* Define to 1 if you have the `z' library (-lz). */
|
||||
#define HAVE_LIBZ 1
|
||||
|
||||
/* Define to 1 if you have the `zstd' library (-lzstd). */
|
||||
/* #undef HAVE_LIBZSTD */
|
||||
|
||||
/* Define to 1 if constants of type 'long long int' should have the suffix LL.
|
||||
*/
|
||||
#define HAVE_LL_CONSTANTS 1
|
||||
@ -378,6 +378,9 @@
|
||||
/* Define to 1 if you have the <poll.h> header file. */
|
||||
#define HAVE_POLL_H 1
|
||||
|
||||
/* Define to 1 if you have a POSIX-conforming sigwait declaration. */
|
||||
/* #undef HAVE_POSIX_DECL_SIGWAIT */
|
||||
|
||||
/* Define to 1 if you have the `posix_fadvise' function. */
|
||||
#define HAVE_POSIX_FADVISE 1
|
||||
|
||||
@ -408,9 +411,6 @@
|
||||
/* Define to 1 if you have the <pwd.h> header file. */
|
||||
#define HAVE_PWD_H 1
|
||||
|
||||
/* Define to 1 if you have the `random' function. */
|
||||
#define HAVE_RANDOM 1
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_H */
|
||||
|
||||
@ -426,10 +426,6 @@
|
||||
/* Define to 1 if you have the `rint' function. */
|
||||
#define HAVE_RINT 1
|
||||
|
||||
/* Define to 1 if you have the global variable
|
||||
'rl_completion_append_character'. */
|
||||
/* #undef HAVE_RL_COMPLETION_APPEND_CHARACTER */
|
||||
|
||||
/* Define to 1 if you have the `rl_completion_matches' function. */
|
||||
#define HAVE_RL_COMPLETION_MATCHES 1
|
||||
|
||||
@ -439,6 +435,9 @@
|
||||
/* Define to 1 if you have the `rl_reset_screen_size' function. */
|
||||
/* #undef HAVE_RL_RESET_SCREEN_SIZE */
|
||||
|
||||
/* Define to 1 if you have the `rl_variable_bind' function. */
|
||||
#define HAVE_RL_VARIABLE_BIND 1
|
||||
|
||||
/* Define to 1 if you have the <security/pam_appl.h> header file. */
|
||||
#define HAVE_SECURITY_PAM_APPL_H 1
|
||||
|
||||
@ -451,6 +450,9 @@
|
||||
/* Define to 1 if you have the `shm_open' function. */
|
||||
#define HAVE_SHM_OPEN 1
|
||||
|
||||
/* Define to 1 if the system has the type `socklen_t'. */
|
||||
#define HAVE_SOCKLEN_T 1
|
||||
|
||||
/* Define to 1 if you have the `sigprocmask' function. */
|
||||
#define HAVE_SIGPROCMASK 1
|
||||
|
||||
@ -466,9 +468,6 @@
|
||||
/* Define to 1 if you have spinlocks. */
|
||||
#define HAVE_SPINLOCKS 1
|
||||
|
||||
/* Define to 1 if you have the `srandom' function. */
|
||||
#define HAVE_SRANDOM 1
|
||||
|
||||
/* Define to 1 if you have the `SSL_CTX_set_num_tickets' function. */
|
||||
/* #define HAVE_SSL_CTX_SET_NUM_TICKETS */
|
||||
|
||||
@ -885,6 +884,9 @@
|
||||
/* Define to select Win32-style shared memory. */
|
||||
/* #undef USE_WIN32_SHARED_MEMORY */
|
||||
|
||||
/* Define to 1 to build with ZSTD support. (--with-zstd) */
|
||||
/* #undef USE_ZSTD */
|
||||
|
||||
/* Define to 1 if `wcstombs_l' requires <xlocale.h>. */
|
||||
/* #undef WCSTOMBS_L_IN_XLOCALE */
|
||||
|
||||
|
@ -9,7 +9,7 @@ Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
quantileDDsketch[relative_accuracy, (level)](expr)
|
||||
quantileDD(relative_accuracy, [level])(expr)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -85,7 +85,7 @@
|
||||
#include <Dictionaries/registerDictionaries.h>
|
||||
#include <Disks/registerDisks.h>
|
||||
#include <Common/Scheduler/Nodes/registerSchedulerNodes.h>
|
||||
#include <Common/Scheduler/Nodes/registerResourceManagers.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Common/Config/ConfigReloader.h>
|
||||
#include <Server/HTTPHandlerFactory.h>
|
||||
#include "MetricsTransmitter.h"
|
||||
@ -780,7 +780,6 @@ try
|
||||
registerFormats();
|
||||
registerRemoteFileMetadatas();
|
||||
registerSchedulerNodes();
|
||||
registerResourceManagers();
|
||||
|
||||
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
|
||||
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());
|
||||
@ -2089,6 +2088,8 @@ try
|
||||
database_catalog.assertDatabaseExists(default_database);
|
||||
/// Load user-defined SQL functions.
|
||||
global_context->getUserDefinedSQLObjectsStorage().loadObjects();
|
||||
/// Load WORKLOADs and RESOURCEs.
|
||||
global_context->getWorkloadEntityStorage().loadEntities();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -1386,6 +1386,10 @@
|
||||
If not specified they will be stored locally. -->
|
||||
<!-- <user_defined_zookeeper_path>/clickhouse/user_defined</user_defined_zookeeper_path> -->
|
||||
|
||||
<!-- Path in ZooKeeper to store workload and resource created by the command CREATE WORKLOAD and CREATE REESOURCE.
|
||||
If not specified they will be stored locally. -->
|
||||
<!-- <workload_zookeeper_path>/clickhouse/workload</workload_zookeeper_path> -->
|
||||
|
||||
<!-- Uncomment if you want data to be compressed 30-100% better.
|
||||
Don't do that if you just started using ClickHouse.
|
||||
-->
|
||||
|
@ -99,6 +99,8 @@ enum class AccessType : uint8_t
|
||||
M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables
|
||||
with arbitrary table engine */\
|
||||
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
|
||||
M(CREATE_WORKLOAD, "", GLOBAL, CREATE) /* allows to execute CREATE WORKLOAD */ \
|
||||
M(CREATE_RESOURCE, "", GLOBAL, CREATE) /* allows to execute CREATE RESOURCE */ \
|
||||
M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \
|
||||
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
|
||||
\
|
||||
@ -108,6 +110,8 @@ enum class AccessType : uint8_t
|
||||
implicitly enabled by the grant DROP_TABLE */\
|
||||
M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\
|
||||
M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\
|
||||
M(DROP_WORKLOAD, "", GLOBAL, DROP) /* allows to execute DROP WORKLOAD */\
|
||||
M(DROP_RESOURCE, "", GLOBAL, DROP) /* allows to execute DROP RESOURCE */\
|
||||
M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute DROP NAMED COLLECTION */\
|
||||
M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\
|
||||
\
|
||||
|
@ -689,15 +689,17 @@ bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlag
|
||||
|
||||
const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY;
|
||||
const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION;
|
||||
const AccessFlags workload_ddl = AccessType::CREATE_WORKLOAD | AccessType::DROP_WORKLOAD;
|
||||
const AccessFlags resource_ddl = AccessType::CREATE_RESOURCE | AccessType::DROP_RESOURCE;
|
||||
const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl;
|
||||
const AccessFlags table_and_dictionary_and_function_ddl = table_ddl | dictionary_ddl | function_ddl;
|
||||
const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE;
|
||||
const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS;
|
||||
|
||||
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_and_function_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
|
||||
const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_and_function_ddl | workload_ddl | resource_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY;
|
||||
const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE;
|
||||
|
||||
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl;
|
||||
const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl | workload_ddl | resource_ddl;
|
||||
const AccessFlags introspection_flags = AccessType::INTROSPECTION;
|
||||
};
|
||||
static const PrecalculatedFlags precalc;
|
||||
|
@ -3,370 +3,89 @@
|
||||
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/IdentifierNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class FunctionTreeNode : public AbstractFunction
|
||||
{
|
||||
public:
|
||||
class ArgumentTreeNode : public Argument
|
||||
{
|
||||
public:
|
||||
explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
|
||||
std::unique_ptr<AbstractFunction> getFunction() const override
|
||||
{
|
||||
if (const auto * f = argument->as<FunctionNode>())
|
||||
return std::make_unique<FunctionTreeNode>(*f);
|
||||
return nullptr;
|
||||
}
|
||||
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
|
||||
bool tryGetString(String * res, bool allow_identifier) const override
|
||||
{
|
||||
if (const auto * literal = argument->as<ConstantNode>())
|
||||
{
|
||||
if (literal->getValue().getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->getValue().safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<IdentifierNode>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->getIdentifier().getFullName();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
private:
|
||||
const IQueryTreeNode * argument = nullptr;
|
||||
};
|
||||
|
||||
class ArgumentsTreeNode : public Arguments
|
||||
{
|
||||
public:
|
||||
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
|
||||
size_t size() const override { return arguments ? arguments->size() : 0; }
|
||||
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
|
||||
private:
|
||||
const QueryTreeNodes * arguments = nullptr;
|
||||
};
|
||||
|
||||
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
|
||||
{
|
||||
if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
|
||||
arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
|
||||
}
|
||||
String name() const override { return function->getFunctionName(); }
|
||||
private:
|
||||
const FunctionNode * function = nullptr;
|
||||
};
|
||||
|
||||
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
class FunctionSecretArgumentsFinderTreeNode
|
||||
class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments())
|
||||
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
|
||||
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
|
||||
{
|
||||
if (arguments.getNodes().empty())
|
||||
if (!function->hasArguments())
|
||||
return;
|
||||
|
||||
findFunctionSecretArguments();
|
||||
findOrdinaryFunctionSecretArguments();
|
||||
}
|
||||
|
||||
struct Result
|
||||
{
|
||||
/// Result constructed by default means no arguments will be hidden.
|
||||
size_t start = static_cast<size_t>(-1);
|
||||
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
|
||||
/// In all known cases secret arguments are consecutive
|
||||
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
|
||||
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
|
||||
std::vector<std::string> nested_maps;
|
||||
|
||||
bool hasSecrets() const
|
||||
{
|
||||
return count != 0 || !nested_maps.empty();
|
||||
}
|
||||
};
|
||||
|
||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||
|
||||
private:
|
||||
const FunctionNode & function;
|
||||
const ListNode & arguments;
|
||||
FunctionSecretArgumentsFinder::Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (index >= arguments.getNodes().size())
|
||||
return;
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findFunctionSecretArguments()
|
||||
{
|
||||
const auto & name = function.getFunctionName();
|
||||
|
||||
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
|
||||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (name == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((name == "remote") || (name == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((name == "encrypt") || (name == "decrypt") ||
|
||||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
|
||||
(name == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (name == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
const auto & nodes = arguments.getNodes();
|
||||
size_t count = nodes.size();
|
||||
while (count > 0)
|
||||
{
|
||||
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->getFunctionName() == "headers")
|
||||
result.nested_maps.push_back(f->getFunctionName());
|
||||
else if (f->getFunctionName() != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= arguments.getNodes().size())
|
||||
return false;
|
||||
|
||||
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
if (const auto * literal = argument->as<ConstantNode>())
|
||||
{
|
||||
if (literal->getValue().getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->getValue().safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<IdentifierNode>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->getIdentifier().getFullName();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (arguments.getNodes().size() < 3)
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
|
||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::optional<String> database;
|
||||
std::optional<QualifiedTableName> qualified_table_name;
|
||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||
{
|
||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
++arg_num;
|
||||
if (database)
|
||||
{
|
||||
/// Skip the 'table' argument if the previous argument was a database name.
|
||||
++arg_num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip username.
|
||||
++arg_num;
|
||||
|
||||
/// Do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
/// Empty string is also allowed (it means the default database).
|
||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||
size_t arg_idx,
|
||||
std::optional<String> & res_database,
|
||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||
{
|
||||
res_database.reset();
|
||||
res_qualified_table_name.reset();
|
||||
|
||||
String str;
|
||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||
return false;
|
||||
|
||||
if (str.empty())
|
||||
{
|
||||
res_database = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||
if (!qualified_table_name)
|
||||
return false;
|
||||
|
||||
if (qualified_table_name->database.empty())
|
||||
res_database = std::move(qualified_table_name->table);
|
||||
else
|
||||
res_qualified_table_name = std::move(qualified_table_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (arguments.getNodes().empty())
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
result.start = 1;
|
||||
result.count = arguments.getNodes().size() - 1;
|
||||
}
|
||||
|
||||
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (arguments.getNodes().size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
|
||||
return identifier != nullptr;
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < arguments.getNodes().size(); ++i)
|
||||
{
|
||||
const auto & argument = arguments.getNodes()[i];
|
||||
const auto * equals_func = argument->as<FunctionNode>();
|
||||
if (!equals_func || (equals_func->getFunctionName() != "equals"))
|
||||
continue;
|
||||
|
||||
const auto * expr_list = equals_func->getArguments().as<ListNode>();
|
||||
if (!expr_list)
|
||||
continue;
|
||||
|
||||
const auto & equal_args = expr_list->getNodes();
|
||||
if (equal_args.size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(equal_args[0], &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2564,8 +2564,8 @@ void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
|
||||
if (node.getNullsAction() != NullsAction::EMPTY)
|
||||
throw Exception(
|
||||
ErrorCodes::SYNTAX_ERROR,
|
||||
"Function with name '{}' cannot use {} NULLS",
|
||||
node.getFunctionName(),
|
||||
"Function with name {} cannot use {} NULLS",
|
||||
backQuote(node.getFunctionName()),
|
||||
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
|
||||
}
|
||||
}
|
||||
@ -3228,16 +3228,16 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
auto hints = NamePrompter<2>::getHints(function_name, possible_function_names);
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
|
||||
"Function with name '{}' does not exist. In scope {}{}",
|
||||
function_name,
|
||||
"Function with name {} does not exist. In scope {}{}",
|
||||
backQuote(function_name),
|
||||
scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(hints));
|
||||
}
|
||||
|
||||
if (!function_lambda_arguments_indexes.empty())
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Aggregate function '{}' does not support lambda arguments",
|
||||
function_name);
|
||||
"Aggregate function {} does not support lambda arguments",
|
||||
backQuote(function_name));
|
||||
|
||||
auto action = function_node_ptr->getNullsAction();
|
||||
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
|
||||
@ -3679,10 +3679,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
|
||||
|
||||
auto hints = IdentifierResolver::collectIdentifierTypoHints(unresolved_identifier, valid_identifiers);
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {}{} identifier '{}' in scope {}{}",
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {}{} identifier {} in scope {}{}",
|
||||
toStringLowercase(IdentifierLookupContext::EXPRESSION),
|
||||
message_clarification,
|
||||
unresolved_identifier.getFullName(),
|
||||
backQuote(unresolved_identifier.getFullName()),
|
||||
scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(hints));
|
||||
}
|
||||
|
@ -114,6 +114,7 @@ add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/Local)
|
||||
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
|
||||
add_headers_and_sources(dbms Common/NamedCollections)
|
||||
add_headers_and_sources(dbms Common/Scheduler/Workload)
|
||||
|
||||
if (TARGET ch_contrib::amqp_cpp)
|
||||
add_headers_and_sources(dbms Storages/RabbitMQ)
|
||||
|
@ -609,7 +609,12 @@
|
||||
M(728, UNEXPECTED_DATA_TYPE) \
|
||||
M(729, ILLEGAL_TIME_SERIES_TAGS) \
|
||||
M(730, REFRESH_FAILED) \
|
||||
<<<<<<< HEAD
|
||||
M(731, WORKLOAD_ENTITY_ALREADY_EXISTS) \
|
||||
M(732, UNKNOWN_WORKLOAD_ENTITY) \
|
||||
=======
|
||||
M(731, QUERY_CACHE_USED_WITH_NON_THROW_OVERFLOW_MODE) \
|
||||
>>>>>>> master
|
||||
\
|
||||
M(900, DISTRIBUTED_CACHE_ERROR) \
|
||||
M(901, CANNOT_USE_DISTRIBUTED_CACHE) \
|
||||
|
@ -6,6 +6,7 @@
|
||||
/// Separate type (rather than `Int64` is used just to avoid implicit conversion errors and to default-initialize
|
||||
struct Priority
|
||||
{
|
||||
Int64 value = 0; /// Note that lower value means higher priority.
|
||||
constexpr operator Int64() const { return value; } /// NOLINT
|
||||
using Value = Int64;
|
||||
Value value = 0; /// Note that lower value means higher priority.
|
||||
constexpr operator Value() const { return value; } /// NOLINT
|
||||
};
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
virtual ClassifierPtr acquire(const String & classifier_name) = 0;
|
||||
|
||||
/// For introspection, see `system.scheduler` table
|
||||
using VisitorFunc = std::function<void(const String & resource, const String & path, const String & type, const SchedulerNodePtr & node)>;
|
||||
using VisitorFunc = std::function<void(const String & resource, const String & path, ISchedulerNode * node)>;
|
||||
virtual void forEachNode(VisitorFunc visitor) = 0;
|
||||
};
|
||||
|
||||
|
@ -15,8 +15,7 @@ namespace DB
|
||||
* When constraint is again satisfied, scheduleActivation() is called from finishRequest().
|
||||
*
|
||||
* Derived class behaviour requirements:
|
||||
* - dequeueRequest() must fill `request->constraint` iff it is nullptr;
|
||||
* - finishRequest() must be recursive: call to `parent_constraint->finishRequest()`.
|
||||
* - dequeueRequest() must call `request->addConstraint()`.
|
||||
*/
|
||||
class ISchedulerConstraint : public ISchedulerNode
|
||||
{
|
||||
@ -25,34 +24,16 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
ISchedulerConstraint(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerNode(event_queue_, info_)
|
||||
{}
|
||||
|
||||
/// Resource consumption by `request` is finished.
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual void finishRequest(ResourceRequest * request) = 0;
|
||||
|
||||
void setParent(ISchedulerNode * parent_) override
|
||||
{
|
||||
ISchedulerNode::setParent(parent_);
|
||||
|
||||
// Assign `parent_constraint` to the nearest parent derived from ISchedulerConstraint
|
||||
for (ISchedulerNode * node = parent_; node != nullptr; node = node->parent)
|
||||
{
|
||||
if (auto * constraint = dynamic_cast<ISchedulerConstraint *>(node))
|
||||
{
|
||||
parent_constraint = constraint;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// For introspection of current state (true = satisfied, false = violated)
|
||||
virtual bool isSatisfied() = 0;
|
||||
|
||||
protected:
|
||||
// Reference to nearest parent that is also derived from ISchedulerConstraint.
|
||||
// Request can traverse through multiple constraints while being dequeue from hierarchy,
|
||||
// while finishing request should traverse the same chain in reverse order.
|
||||
// NOTE: it must be immutable after initialization, because it is accessed in not thread-safe way from finishRequest()
|
||||
ISchedulerConstraint * parent_constraint = nullptr;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -57,7 +57,13 @@ struct SchedulerNodeInfo
|
||||
|
||||
SchedulerNodeInfo() = default;
|
||||
|
||||
explicit SchedulerNodeInfo(const Poco::Util::AbstractConfiguration & config = emptyConfig(), const String & config_prefix = {})
|
||||
explicit SchedulerNodeInfo(double weight_, Priority priority_ = {})
|
||||
{
|
||||
setWeight(weight_);
|
||||
setPriority(priority_);
|
||||
}
|
||||
|
||||
explicit SchedulerNodeInfo(const Poco::Util::AbstractConfiguration & config, const String & config_prefix = {})
|
||||
{
|
||||
setWeight(config.getDouble(config_prefix + ".weight", weight));
|
||||
setPriority(config.getInt64(config_prefix + ".priority", priority));
|
||||
@ -78,6 +84,11 @@ struct SchedulerNodeInfo
|
||||
priority.value = value;
|
||||
}
|
||||
|
||||
void setPriority(Priority value)
|
||||
{
|
||||
priority = value;
|
||||
}
|
||||
|
||||
// To check if configuration update required
|
||||
bool equals(const SchedulerNodeInfo & o) const
|
||||
{
|
||||
@ -123,8 +134,15 @@ public:
|
||||
, info(config, config_prefix)
|
||||
{}
|
||||
|
||||
ISchedulerNode(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: event_queue(event_queue_)
|
||||
, info(info_)
|
||||
{}
|
||||
|
||||
virtual ~ISchedulerNode() = default;
|
||||
|
||||
virtual const String & getTypeName() const = 0;
|
||||
|
||||
/// Checks if two nodes configuration is equal
|
||||
virtual bool equals(ISchedulerNode * other)
|
||||
{
|
||||
@ -134,10 +152,11 @@ public:
|
||||
/// Attach new child
|
||||
virtual void attachChild(const std::shared_ptr<ISchedulerNode> & child) = 0;
|
||||
|
||||
/// Detach and destroy child
|
||||
/// Detach child
|
||||
/// NOTE: child might be destroyed if the only reference was stored in parent
|
||||
virtual void removeChild(ISchedulerNode * child) = 0;
|
||||
|
||||
/// Get attached child by name
|
||||
/// Get attached child by name (for tests only)
|
||||
virtual ISchedulerNode * getChild(const String & child_name) = 0;
|
||||
|
||||
/// Activation of child due to the first pending request
|
||||
@ -147,7 +166,7 @@ public:
|
||||
/// Returns true iff node is active
|
||||
virtual bool isActive() = 0;
|
||||
|
||||
/// Returns number of active children
|
||||
/// Returns number of active children (for introspection only).
|
||||
virtual size_t activeChildren() = 0;
|
||||
|
||||
/// Returns the first request to be executed as the first component of resulting pair.
|
||||
@ -155,10 +174,10 @@ public:
|
||||
virtual std::pair<ResourceRequest *, bool> dequeueRequest() = 0;
|
||||
|
||||
/// Returns full path string using names of every parent
|
||||
String getPath()
|
||||
String getPath() const
|
||||
{
|
||||
String result;
|
||||
ISchedulerNode * ptr = this;
|
||||
const ISchedulerNode * ptr = this;
|
||||
while (ptr->parent)
|
||||
{
|
||||
result = "/" + ptr->basename + result;
|
||||
|
@ -21,6 +21,10 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
ISchedulerQueue(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerNode(event_queue_, info_)
|
||||
{}
|
||||
|
||||
// Wrapper for `enqueueRequest()` that should be used to account for available resource budget
|
||||
// Returns `estimated_cost` that should be passed later to `adjustBudget()`
|
||||
[[ nodiscard ]] ResourceCost enqueueRequestUsingBudget(ResourceRequest * request)
|
||||
@ -47,6 +51,11 @@ public:
|
||||
/// Should be called outside of scheduling subsystem, implementation must be thread-safe.
|
||||
virtual bool cancelRequest(ResourceRequest * request) = 0;
|
||||
|
||||
/// Fails all the resource requests in queue and marks this queue as not usable.
|
||||
/// Afterwards any new request will be failed on `enqueueRequest()`.
|
||||
/// NOTE: This is done for queues that are about to be destructed.
|
||||
virtual void purgeQueue() = 0;
|
||||
|
||||
/// For introspection
|
||||
ResourceCost getBudget() const
|
||||
{
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Common/Scheduler/Nodes/DynamicResourceManager.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/SchedulerNodeFactory.h>
|
||||
#include <Common/Scheduler/ResourceManagerFactory.h>
|
||||
#include <Common/Scheduler/ISchedulerQueue.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
@ -245,7 +244,7 @@ void DynamicResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
{
|
||||
for (auto & [name, resource] : state_ref->resources)
|
||||
for (auto & [path, node] : resource->nodes)
|
||||
visitor(name, path, node.type, node.ptr);
|
||||
visitor(name, path, node.ptr.get());
|
||||
promise.set_value();
|
||||
});
|
||||
|
||||
@ -253,9 +252,4 @@ void DynamicResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
future.get();
|
||||
}
|
||||
|
||||
void registerDynamicResourceManager(ResourceManagerFactory & factory)
|
||||
{
|
||||
factory.registerMethod<DynamicResourceManager>("dynamic");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -48,6 +48,16 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
FairPolicy(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerNode(event_queue_, info_)
|
||||
{}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("fair");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
|
@ -30,6 +30,21 @@ public:
|
||||
: ISchedulerQueue(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
FifoQueue(EventQueue * event_queue_, const SchedulerNodeInfo & info_)
|
||||
: ISchedulerQueue(event_queue_, info_)
|
||||
{}
|
||||
|
||||
~FifoQueue() override
|
||||
{
|
||||
chassert(requests.empty());
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("fifo");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
@ -42,6 +57,8 @@ public:
|
||||
void enqueueRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (is_not_usable)
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Scheduler queue is about to be destructed");
|
||||
queue_cost += request->cost;
|
||||
bool was_empty = requests.empty();
|
||||
requests.push_back(*request);
|
||||
@ -66,6 +83,8 @@ public:
|
||||
bool cancelRequest(ResourceRequest * request) override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (is_not_usable)
|
||||
return false; // Any request should already be failed or executed
|
||||
if (request->is_linked())
|
||||
{
|
||||
// It's impossible to check that `request` is indeed inserted to this queue and not another queue.
|
||||
@ -88,6 +107,19 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
void purgeQueue() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
is_not_usable = true;
|
||||
while (!requests.empty())
|
||||
{
|
||||
ResourceRequest * request = &requests.front();
|
||||
requests.pop_front();
|
||||
request->failed(std::make_exception_ptr(
|
||||
Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Scheduler queue with resource request is about to be destructed")));
|
||||
}
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
@ -131,6 +163,7 @@ private:
|
||||
std::mutex mutex;
|
||||
Int64 queue_cost = 0;
|
||||
boost::intrusive::list<ResourceRequest> requests;
|
||||
bool is_not_usable = false;
|
||||
};
|
||||
|
||||
}
|
||||
|
502
src/Common/Scheduler/Nodes/IOResourceManager.cpp
Normal file
502
src/Common/Scheduler/Nodes/IOResourceManager.cpp
Normal file
@ -0,0 +1,502 @@
|
||||
#include "Common/Scheduler/IResourceManager.h"
|
||||
#include <Common/Scheduler/Nodes/IOResourceManager.h>
|
||||
|
||||
#include <Common/Scheduler/Nodes/FifoQueue.h>
|
||||
#include <Common/Scheduler/Nodes/FairPolicy.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/Priority.h>
|
||||
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_ACCESS_DENIED;
|
||||
extern const int RESOURCE_NOT_FOUND;
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
String getEntityName(const ASTPtr & ast)
|
||||
{
|
||||
if (auto * create = typeid_cast<ASTCreateWorkloadQuery *>(ast.get()))
|
||||
return create->getWorkloadName();
|
||||
if (auto * create = typeid_cast<ASTCreateResourceQuery *>(ast.get()))
|
||||
return create->getResourceName();
|
||||
return "unknown-workload-entity";
|
||||
}
|
||||
}
|
||||
|
||||
IOResourceManager::NodeInfo::NodeInfo(const ASTPtr & ast, const String & resource_name)
|
||||
{
|
||||
auto * create = typeid_cast<ASTCreateWorkloadQuery *>(ast.get());
|
||||
name = create->getWorkloadName();
|
||||
parent = create->getWorkloadParent();
|
||||
// TODO(serxa): parse workload settings specifically for `resource_name`
|
||||
UNUSED(resource_name);
|
||||
}
|
||||
|
||||
IOResourceManager::Resource::Resource(const ASTPtr & resource_entity_)
|
||||
: resource_entity(resource_entity_)
|
||||
, resource_name(getEntityName(resource_entity))
|
||||
{
|
||||
scheduler.start();
|
||||
}
|
||||
|
||||
IOResourceManager::Resource::~Resource()
|
||||
{
|
||||
// TODO(serxa): destroy all workloads, purge all queue, abort all resource requests
|
||||
scheduler.stop();
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::createNode(const NodeInfo & info)
|
||||
{
|
||||
// TODO(serxa): make sure all possible callers validate empty workload name!
|
||||
if (info.name.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Workload must have a name in resource '{}'",
|
||||
resource_name);
|
||||
|
||||
// TODO(serxa): make sure all possible callers validate self-reference!
|
||||
if (info.name == info.parent)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Self-referencing workload '{}' is not allowed in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
if (node_for_workload.contains(info.name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node for creating workload '{}' already exist in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
// TODO(serxa): make sure all possible callers validate parent existence, add tests for creating workload with invalid parent
|
||||
if (!info.parent.empty() && !node_for_workload.contains(info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parent node '{}' for creating workload '{}' does not exist in resource '{}'",
|
||||
info.parent, info.name, resource_name);
|
||||
|
||||
// TODO(serxa): make sure all possible callers validate second root, add tests for creating the second root
|
||||
if (info.parent.empty() && root_node)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The second root workload '{}' is not allowed (current root '{}') in resource '{}'",
|
||||
info.name, root_node->basename, resource_name);
|
||||
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
auto node = std::make_shared<UnifiedSchedulerNode>(scheduler.event_queue, info.settings);
|
||||
node->basename = info.name;
|
||||
if (!info.parent.empty())
|
||||
node_for_workload[info.parent]->attachUnifiedChild(node);
|
||||
else
|
||||
{
|
||||
root_node = node;
|
||||
scheduler.attachChild(root_node);
|
||||
}
|
||||
node_for_workload[info.name] = node;
|
||||
|
||||
updateCurrentVersion();
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::deleteNode(const NodeInfo & info)
|
||||
{
|
||||
if (!node_for_workload.contains(info.name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node for removing workload '{}' does not exist in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
if (!info.parent.empty() && !node_for_workload.contains(info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parent node '{}' for removing workload '{}' does not exist in resource '{}'",
|
||||
info.parent, info.name, resource_name);
|
||||
|
||||
auto node = node_for_workload[info.name];
|
||||
|
||||
// TODO(serxa): make sure all possible callers validate that removing workload has no children workloads
|
||||
if (node->hasUnifiedChildren())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Removing workload '{}' with children in resource '{}'",
|
||||
info.name, resource_name);
|
||||
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
if (!info.parent.empty())
|
||||
node_for_workload[info.parent]->detachUnifiedChild(node);
|
||||
else
|
||||
{
|
||||
chassert(node == root_node);
|
||||
scheduler.removeChild(root_node.get());
|
||||
root_node.reset();
|
||||
}
|
||||
|
||||
updateCurrentVersion();
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::updateNode(const NodeInfo & old_info, const NodeInfo & new_info)
|
||||
{
|
||||
if (old_info.name != new_info.name)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Updating a name of workload '{}' to '{}' is not allowed in resource '{}'",
|
||||
old_info.name, new_info.name, resource_name);
|
||||
|
||||
if (old_info.parent != new_info.parent && (old_info.parent.empty() || old_info.parent.empty()))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Workload '{}' invalid update of parent from '{}' to '{}' in resource '{}'",
|
||||
old_info.name, old_info.parent, new_info.parent, resource_name);
|
||||
|
||||
if (!node_for_workload.contains(old_info.name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Node for updating workload '{}' does not exist in resource '{}'",
|
||||
old_info.name, resource_name);
|
||||
|
||||
if (!old_info.parent.empty() && !node_for_workload.contains(old_info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Old parent node '{}' for updating workload '{}' does not exist in resource '{}'",
|
||||
old_info.parent, old_info.name, resource_name);
|
||||
|
||||
if (!new_info.parent.empty() && !node_for_workload.contains(new_info.parent))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "New parent node '{}' for updating workload '{}' does not exist in resource '{}'",
|
||||
new_info.parent, new_info.name, resource_name);
|
||||
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
auto node = node_for_workload[old_info.name];
|
||||
bool detached = false;
|
||||
if (old_info.parent != new_info.parent)
|
||||
{
|
||||
node_for_workload[old_info.parent]->detachUnifiedChild(node);
|
||||
detached = true;
|
||||
}
|
||||
|
||||
node->updateSchedulingSettings(new_info.settings);
|
||||
if (!detached && !old_info.parent.empty() && old_info.settings.priority != new_info.settings.priority)
|
||||
node_for_workload[old_info.parent]->updateUnifiedChildPriority(
|
||||
node,
|
||||
old_info.settings.priority,
|
||||
new_info.settings.priority);
|
||||
|
||||
if (detached)
|
||||
node_for_workload[new_info.parent]->attachUnifiedChild(node);
|
||||
|
||||
updateCurrentVersion();
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::updateCurrentVersion()
|
||||
{
|
||||
auto previous_version = current_version;
|
||||
|
||||
// Create a full list of constraints and queues in the current hierarchy
|
||||
current_version = std::make_shared<Version>();
|
||||
if (root_node)
|
||||
root_node->addRawPointerNodes(current_version->nodes);
|
||||
|
||||
// See details in version control section of description in IOResourceManager.h
|
||||
if (previous_version)
|
||||
{
|
||||
previous_version->newer_version = current_version;
|
||||
// TODO(serxa): Node activations might be in event queue on destruction. How to process them? should we just process all events in queue on important updates? add a separate queue for hierarchy modifications? Or maybe everything works as expected, we need unit tests for this.
|
||||
// Looks like the problem of activations could be solved just by unliking activation from intrusive list on destruction, but we must make sure all destruction are done under event_queue::mutex (which seems imposible)
|
||||
previous_version.reset(); // Destroys previous version nodes if there are no classifiers referencing it
|
||||
}
|
||||
}
|
||||
|
||||
IOResourceManager::Workload::Workload(IOResourceManager * resource_manager_, const ASTPtr & workload_entity_)
|
||||
: resource_manager(resource_manager_)
|
||||
, workload_entity(workload_entity_)
|
||||
{
|
||||
for (auto & [resource_name, resource] : resource_manager->resources)
|
||||
resource->createNode(NodeInfo(workload_entity, resource_name));
|
||||
}
|
||||
|
||||
IOResourceManager::Workload::~Workload()
|
||||
{
|
||||
for (auto & [resource_name, resource] : resource_manager->resources)
|
||||
resource->deleteNode(NodeInfo(workload_entity, resource_name));
|
||||
}
|
||||
|
||||
void IOResourceManager::Workload::updateWorkload(const ASTPtr & new_entity)
|
||||
{
|
||||
for (auto & [resource_name, resource] : resource_manager->resources)
|
||||
resource->updateNode(NodeInfo(workload_entity, resource_name), NodeInfo(new_entity, resource_name));
|
||||
workload_entity = new_entity;
|
||||
}
|
||||
|
||||
String IOResourceManager::Workload::getParent() const
|
||||
{
|
||||
return typeid_cast<ASTCreateWorkloadQuery *>(workload_entity.get())->getWorkloadParent();
|
||||
}
|
||||
|
||||
IOResourceManager::IOResourceManager(IWorkloadEntityStorage & storage_)
|
||||
: storage(storage_)
|
||||
{
|
||||
workload_change_subscription = storage.subscribeForChanges(WorkloadEntityType::Workload, [this] (
|
||||
WorkloadEntityType,
|
||||
const String & entity_name,
|
||||
const ASTPtr & entity)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (entity)
|
||||
createOrUpdateWorkload(entity_name, entity);
|
||||
else
|
||||
deleteWorkload(entity_name);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// TODO(serxa): handle CRUD errors
|
||||
}
|
||||
});
|
||||
resource_change_subscription = storage.subscribeForChanges(WorkloadEntityType::Resource, [this] (
|
||||
WorkloadEntityType,
|
||||
const String & entity_name,
|
||||
const ASTPtr & entity /* new or changed entity, null if removed */)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (entity)
|
||||
createResource(entity_name, entity);
|
||||
else
|
||||
deleteResource(entity_name);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
// TODO(serxa): handle CRUD errors
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::updateConfiguration(const Poco::Util::AbstractConfiguration &)
|
||||
{
|
||||
// No-op
|
||||
}
|
||||
|
||||
void IOResourceManager::createOrUpdateWorkload(const String & workload_name, const ASTPtr & ast)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto workload_iter = workloads.find(workload_name); workload_iter != workloads.end())
|
||||
workload_iter->second->updateWorkload(ast);
|
||||
else
|
||||
workloads.emplace(workload_name, std::make_shared<Workload>(this, ast));
|
||||
}
|
||||
|
||||
void IOResourceManager::deleteWorkload(const String & workload_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto workload_iter = workloads.find(workload_name); workload_iter != workloads.end())
|
||||
workloads.erase(workload_iter);
|
||||
else
|
||||
{
|
||||
// Workload to be deleted does not exist -- do nothing, throwing exceptions from a subscription is pointless
|
||||
// TODO(serxa): add logging
|
||||
}
|
||||
}
|
||||
|
||||
void IOResourceManager::createResource(const String & resource_name, const ASTPtr & ast)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto resource_iter = resources.find(resource_name); resource_iter != resources.end())
|
||||
{
|
||||
// Resource to be created already exist -- do nothing, throwing exceptions from a subscription is pointless
|
||||
// TODO(serxa): add logging
|
||||
}
|
||||
else
|
||||
{
|
||||
// Add all workloads into the new resource
|
||||
auto resource = std::make_shared<Resource>(ast);
|
||||
for (Workload * workload : topologicallySortedWorkloads())
|
||||
resource->createNode(NodeInfo(workload->workload_entity, resource_name));
|
||||
|
||||
// Attach the resource
|
||||
resources.emplace(resource_name, resource);
|
||||
}
|
||||
}
|
||||
|
||||
void IOResourceManager::deleteResource(const String & resource_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto resource_iter = resources.find(resource_name); resource_iter != resources.end())
|
||||
{
|
||||
resources.erase(resource_iter);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Resource to be deleted does not exist -- do nothing, throwing exceptions from a subscription is pointless
|
||||
// TODO(serxa): add logging
|
||||
}
|
||||
}
|
||||
|
||||
IOResourceManager::Classifier::~Classifier()
|
||||
{
|
||||
// Detach classifier from all resources in parallel (executed in every scheduler thread)
|
||||
std::vector<std::future<void>> futures;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
futures.reserve(attachments.size());
|
||||
for (auto & [resource_name, attachment] : attachments)
|
||||
{
|
||||
futures.emplace_back(attachment.resource->detachClassifier(std::move(attachment.version)));
|
||||
attachment.link.reset(); // Just in case because it is not valid any longer
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for all tasks to finish (to avoid races in case of exceptions)
|
||||
for (auto & future : futures)
|
||||
future.wait();
|
||||
|
||||
// There should not be any exceptions because it just destruct few objects, but let's rethrow just in case
|
||||
for (auto & future : futures)
|
||||
future.get();
|
||||
|
||||
// This unreferences and probably destroys `Resource` objects.
|
||||
// NOTE: We cannot do it in the scheduler threads (because thread cannot join itself).
|
||||
attachments.clear();
|
||||
}
|
||||
|
||||
std::future<void> IOResourceManager::Resource::detachClassifier(VersionPtr && version)
|
||||
{
|
||||
auto detach_promise = std::make_shared<std::promise<void>>(); // event queue task is std::function, which requires copy semanticss
|
||||
auto future = detach_promise->get_future();
|
||||
scheduler.event_queue->enqueue([detached_version = std::move(version), promise = std::move(detach_promise)] mutable
|
||||
{
|
||||
try
|
||||
{
|
||||
// Unreferences and probably destroys the version and scheduler nodes it owns.
|
||||
// The main reason from moving destruction into the scheduler thread is to
|
||||
// free memory in the same thread it was allocated to avoid memtrackers drift.
|
||||
detached_version.reset();
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
return future;
|
||||
}
|
||||
|
||||
ResourceLink IOResourceManager::Classifier::get(const String & resource_name)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
if (auto iter = attachments.find(resource_name); iter != attachments.end())
|
||||
return iter->second.link;
|
||||
else
|
||||
throw Exception(ErrorCodes::RESOURCE_NOT_FOUND, "Access denied to resource '{}'", resource_name);
|
||||
}
|
||||
|
||||
void IOResourceManager::Classifier::attach(const ResourcePtr & resource, const VersionPtr & version, ResourceLink link)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
chassert(!attachments.contains(resource->getName()));
|
||||
attachments[resource->getName()] = Attachment{.resource = resource, .version = version, .link = link};
|
||||
}
|
||||
|
||||
std::future<void> IOResourceManager::Resource::attachClassifier(Classifier & classifier, const String & workload_name)
|
||||
{
|
||||
auto attach_promise = std::make_shared<std::promise<void>>(); // event queue task is std::function, which requires copy semantics
|
||||
auto future = attach_promise->get_future();
|
||||
scheduler.event_queue->enqueue([&, this, promise = std::move(attach_promise)] mutable
|
||||
{
|
||||
try
|
||||
{
|
||||
if (auto iter = node_for_workload.find(workload_name); iter != node_for_workload.end())
|
||||
{
|
||||
auto queue = iter->second->getQueue();
|
||||
if (!queue)
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unable to use workload '{}' that have children for resource '{}'",
|
||||
workload_name, resource_name);
|
||||
classifier.attach(shared_from_this(), current_version, ResourceLink{.queue = queue.get()});
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unable to find workload '{}' for resource '{}'", workload_name, resource_name);
|
||||
promise->set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise->set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
return future;
|
||||
}
|
||||
|
||||
ClassifierPtr IOResourceManager::acquire(const String & workload_name)
|
||||
{
|
||||
auto classifier = std::make_shared<Classifier>();
|
||||
|
||||
// Attach classifier to all resources in parallel (executed in every scheduler thread)
|
||||
std::vector<std::future<void>> futures;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
futures.reserve(resources.size());
|
||||
for (auto & [resource_name, resource] : resources)
|
||||
futures.emplace_back(resource->attachClassifier(*classifier, workload_name));
|
||||
}
|
||||
|
||||
// Wait for all tasks to finish (to avoid races in case of exceptions)
|
||||
for (auto & future : futures)
|
||||
future.wait();
|
||||
|
||||
// Rethrow exceptions if any
|
||||
for (auto & future : futures)
|
||||
future.get();
|
||||
|
||||
return classifier;
|
||||
}
|
||||
|
||||
void IOResourceManager::Resource::forEachResourceNode(IResourceManager::VisitorFunc & visitor)
|
||||
{
|
||||
executeInSchedulerThread([&, this]
|
||||
{
|
||||
for (auto & [path, node] : node_for_workload)
|
||||
{
|
||||
node->forEachSchedulerNode([&] (ISchedulerNode * scheduler_node)
|
||||
{
|
||||
visitor(resource_name, scheduler_node->getPath(), scheduler_node);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void IOResourceManager::forEachNode(IResourceManager::VisitorFunc visitor)
|
||||
{
|
||||
// Gather resource upfront to avoid holding mutex for a long time
|
||||
std::map<String, ResourcePtr> sorted_resources;
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
for (auto & [resource_name, resource] : resources)
|
||||
sorted_resources[resource_name] = resource;
|
||||
}
|
||||
|
||||
/// Run tasks one by one to avoid concurrent calls to visitor
|
||||
for (auto & [resource_name, resource] : sorted_resources)
|
||||
resource->forEachResourceNode(visitor);
|
||||
}
|
||||
|
||||
void IOResourceManager::topologicallySortedWorkloadsImpl(Workload * workload, std::unordered_set<Workload *> & visited, std::vector<Workload *> & sorted_workloads)
|
||||
{
|
||||
if (visited.contains(workload))
|
||||
return;
|
||||
visited.insert(workload);
|
||||
|
||||
// Recurse into parent (if any)
|
||||
String parent = workload->getParent();
|
||||
if (!parent.empty())
|
||||
{
|
||||
auto parent_iter = workloads.find(parent);
|
||||
chassert(parent_iter != workloads.end()); // validations check that all parents exist
|
||||
topologicallySortedWorkloadsImpl(parent_iter->second.get(), visited, sorted_workloads);
|
||||
}
|
||||
|
||||
sorted_workloads.push_back(workload);
|
||||
}
|
||||
|
||||
std::vector<IOResourceManager::Workload *> IOResourceManager::topologicallySortedWorkloads()
|
||||
{
|
||||
std::vector<Workload *> sorted_workloads;
|
||||
std::unordered_set<Workload *> visited;
|
||||
for (auto & [workload_name, workload] : workloads)
|
||||
topologicallySortedWorkloadsImpl(workload.get(), visited, sorted_workloads);
|
||||
return sorted_workloads;
|
||||
}
|
||||
|
||||
}
|
272
src/Common/Scheduler/Nodes/IOResourceManager.h
Normal file
272
src/Common/Scheduler/Nodes/IOResourceManager.h
Normal file
@ -0,0 +1,272 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/defines.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
#include <Common/Scheduler/SchedulerRoot.h>
|
||||
#include <Common/Scheduler/Nodes/UnifiedSchedulerNode.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
|
||||
#include <exception>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <future>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/*
|
||||
* Implementation of `IResourceManager` that creates hierarchy of scheduler nodes according to
|
||||
* workload entities (WORKLOADs and RESOURCEs). It subscribes for updates in IWorkloadEntityStorage and
|
||||
* creates hierarchy of UnifiedSchedulerNode identical to the hierarchy of WORKLOADs.
|
||||
* For every RESOURCE an independent hierarchy of scheduler nodes is created.
|
||||
*
|
||||
* Manager process updates of WORKLOADs and RESOURCEs: CREATE/DROP/ALTER.
|
||||
* When a RESOURCE is created (dropped) a corresponding scheduler nodes hierarchy is created (destroyed).
|
||||
* After DROP RESOURCE parts of hierarchy might be keept alive while at least one query uses it.
|
||||
*
|
||||
* Manager is specific to IO only because it create scheduler node hierarchies for RESOURCEs having
|
||||
* WRITE DISK and/or READ DISK definitions. CPU and memory resources are managed separately.
|
||||
*
|
||||
* Classifiers are used (1) to access IO resources and (2) to keep shared ownership of scheduling nodes.
|
||||
* This allows `ResourceRequest` and `ResourceLink` to hold raw pointers as long as
|
||||
* `ClassifierPtr` is acquired and held.
|
||||
*
|
||||
* === RESOURCE ARCHITECTURE ===
|
||||
* Let's consider how a single resource is implemented. Every workload is represented by corresponding UnifiedSchedulerNode.
|
||||
* Every UnifiedSchedulerNode manages its own subtree of ISchedulerNode objects (see details in UnifiedSchedulerNode.h)
|
||||
* UnifiedSchedulerNode for workload w/o children has a queue, which provide a ResourceLink for consumption.
|
||||
* Parent of the root workload for a resource is SchedulerRoot with its own scheduler thread.
|
||||
* So every resource has its dedicated thread for processing of resource request and other events (see EventQueue).
|
||||
*
|
||||
* Here is an example of SQL and corresponding heirarchy of scheduler nodes:
|
||||
* CREATE RESOURCE my_io_resource (...)
|
||||
* CREATE WORKLOAD all
|
||||
* CREATE WORKLOAD production PARENT all
|
||||
* CREATE WORKLOAD development PARENT all
|
||||
*
|
||||
* root - SchedulerRoot (with scheduler thread and EventQueue)
|
||||
* |
|
||||
* all - UnifiedSchedulerNode
|
||||
* |
|
||||
* p0_fair - FairPolicy (part of parent UnifiedSchedulerNode internal structure)
|
||||
* / \
|
||||
* production development - UnifiedSchedulerNode
|
||||
* | |
|
||||
* queue queue - FifoQueue (part of parent UnifiedSchedulerNode internal structure)
|
||||
*
|
||||
* === UPDATING WORKLOADS ===
|
||||
* Workload may be created, updated or deleted.
|
||||
* Updating a child of a workload might lead to updating other workloads:
|
||||
* 1. Workload itself: it's structure depend on settings of children workloads
|
||||
* (e.g. fifo node of a leaf workload is remove when the first child is added;
|
||||
* and a fair node is inserted after the first two children are added).
|
||||
* 2. Other children: for them path to root might be changed (e.g. intermediate priority node is inserted)
|
||||
*
|
||||
* === VERSION CONTROL ===
|
||||
* Versions are created on hierarchy updates and hold ownership of nodes that are used through raw pointers.
|
||||
* Classifier reference version of every resource it use. Older version reference newer version.
|
||||
* Here is a diagram explaining version control based on Version objects (for 1 resource):
|
||||
*
|
||||
* [nodes] [nodes] [nodes]
|
||||
* ^ ^ ^
|
||||
* | | |
|
||||
* version1 --> version2 -...-> versionN
|
||||
* ^ ^ ^
|
||||
* | | |
|
||||
* old_classifier new_classifier current_version
|
||||
*
|
||||
* Previous version should hold reference to a newer version. It is required for proper handling of updates.
|
||||
* Classifiers that were created for any of old versions may use nodes of newer version due to updateNode().
|
||||
* It may move a queue to a new position in the hierarchy or create/destry constraints, thus resource requests
|
||||
* created by old classifier may reference constraints of newer versions through `request->constraints` which
|
||||
* is filled during dequeueRequst().
|
||||
*
|
||||
* === THREADS ===
|
||||
* scheduler thread:
|
||||
* - one thread per resource
|
||||
* - uses event_queue (per resource) for processing w/o holding mutex for every scheduler node
|
||||
* - handle resource requests
|
||||
* - node activations
|
||||
* - scheduler hierarchy updates
|
||||
* query thread:
|
||||
* - multiple independent threads
|
||||
* - send resource requests
|
||||
* - acquire and release classifiers (via scheduler event queues)
|
||||
* control thread:
|
||||
* - modify workload and resources through subscription
|
||||
*
|
||||
* === SYNCHRONIZATION ===
|
||||
* List of related sync primitives and their roles:
|
||||
* IOResourceManager::mutex
|
||||
* - protects resource manager data structures - resource and workloads
|
||||
* - serialize control thread actions
|
||||
* IOResourceManager::Resource::scheduler->event_queue
|
||||
* - serializes scheduler hierarchy events
|
||||
* - events are created in control and query threads
|
||||
* - all events are processed by specific scheduler thread
|
||||
* - hierarchy-wide actions: requests dequeueing, activations propagation and nodes updates.
|
||||
* - resource version control management
|
||||
* FifoQueue::mutex and SemaphoreContraint::mutex
|
||||
* - serializes query and scheduler threads on specific node accesses
|
||||
* - resource request processing: enqueueRequest(), dequeueRequest() and finishRequest()
|
||||
*/
|
||||
class IOResourceManager : public IResourceManager
|
||||
{
|
||||
public:
|
||||
explicit IOResourceManager(IWorkloadEntityStorage & storage_);
|
||||
void updateConfiguration(const Poco::Util::AbstractConfiguration & config) override;
|
||||
ClassifierPtr acquire(const String & workload_name) override;
|
||||
void forEachNode(VisitorFunc visitor) override;
|
||||
|
||||
private:
|
||||
// Forward declarations
|
||||
struct NodeInfo;
|
||||
struct Version;
|
||||
class Resource;
|
||||
struct Workload;
|
||||
class Classifier;
|
||||
|
||||
friend struct Workload;
|
||||
|
||||
using VersionPtr = std::shared_ptr<Version>;
|
||||
using ResourcePtr = std::shared_ptr<Resource>;
|
||||
using WorkloadPtr = std::shared_ptr<Workload>;
|
||||
|
||||
/// Helper for parsing workload AST for a specific resource
|
||||
struct NodeInfo
|
||||
{
|
||||
String name; // Workload name
|
||||
String parent; // Name of parent workload
|
||||
SchedulingSettings settings; // Settings specific for a given resource
|
||||
|
||||
NodeInfo(const ASTPtr & ast, const String & resource_name);
|
||||
};
|
||||
|
||||
/// Ownership control for scheduler nodes, which could be referenced by raw pointers
|
||||
struct Version
|
||||
{
|
||||
std::vector<SchedulerNodePtr> nodes;
|
||||
VersionPtr newer_version;
|
||||
};
|
||||
|
||||
/// Holds a thread and hierarchy of unified scheduler nodes for specific RESOURCE
|
||||
class Resource : public std::enable_shared_from_this<Resource>, boost::noncopyable
|
||||
{
|
||||
public:
|
||||
explicit Resource(const ASTPtr & resource_entity_);
|
||||
~Resource();
|
||||
|
||||
const String & getName() const { return resource_name; }
|
||||
|
||||
/// Hierarchy management
|
||||
void createNode(const NodeInfo & info);
|
||||
void deleteNode(const NodeInfo & info);
|
||||
void updateNode(const NodeInfo & old_info, const NodeInfo & new_info);
|
||||
|
||||
/// Updates a classifier to contain a reference for specified workload
|
||||
std::future<void> attachClassifier(Classifier & classifier, const String & workload_name);
|
||||
|
||||
/// Remove classifier reference. This destroys scheduler nodes in proper scheduler thread
|
||||
std::future<void> detachClassifier(VersionPtr && version);
|
||||
|
||||
/// Introspection
|
||||
void forEachResourceNode(IOResourceManager::VisitorFunc & visitor);
|
||||
|
||||
private:
|
||||
void updateCurrentVersion();
|
||||
|
||||
template <class Task>
|
||||
void executeInSchedulerThread(Task && task)
|
||||
{
|
||||
std::promise<void> promise;
|
||||
auto future = promise.get_future();
|
||||
scheduler.event_queue->enqueue([&]
|
||||
{
|
||||
try
|
||||
{
|
||||
task();
|
||||
promise.set_value();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
promise.set_exception(std::current_exception());
|
||||
}
|
||||
});
|
||||
future.get(); // Blocks until execution is done in the scheduler thread
|
||||
}
|
||||
|
||||
const ASTPtr resource_entity;
|
||||
const String resource_name;
|
||||
SchedulerRoot scheduler;
|
||||
|
||||
// TODO(serxa): consider using resource_manager->mutex + scheduler thread for updates and mutex only for reading to avoid slow acquire/release of classifier
|
||||
/// These field should be accessed only by the scheduler thread
|
||||
std::unordered_map<String, UnifiedSchedulerNodePtr> node_for_workload;
|
||||
UnifiedSchedulerNodePtr root_node;
|
||||
VersionPtr current_version;
|
||||
};
|
||||
|
||||
struct Workload : boost::noncopyable
|
||||
{
|
||||
IOResourceManager * resource_manager;
|
||||
ASTPtr workload_entity;
|
||||
|
||||
Workload(IOResourceManager * resource_manager_, const ASTPtr & workload_entity_);
|
||||
~Workload();
|
||||
|
||||
void updateWorkload(const ASTPtr & new_entity);
|
||||
String getParent() const;
|
||||
};
|
||||
|
||||
class Classifier : public IClassifier
|
||||
{
|
||||
public:
|
||||
~Classifier() override;
|
||||
|
||||
/// Implements IClassifier interface
|
||||
/// NOTE: It is called from query threads (possibly multiple)
|
||||
ResourceLink get(const String & resource_name) override;
|
||||
|
||||
/// Attaches/detaches a specific resource
|
||||
/// NOTE: It is called from scheduler threads (possibly multiple)
|
||||
void attach(const ResourcePtr & resource, const VersionPtr & version, ResourceLink link);
|
||||
void detach(const ResourcePtr & resource);
|
||||
|
||||
private:
|
||||
IOResourceManager * resource_manager;
|
||||
std::mutex mutex;
|
||||
struct Attachment {
|
||||
ResourcePtr resource;
|
||||
VersionPtr version;
|
||||
ResourceLink link;
|
||||
};
|
||||
std::unordered_map<String, Attachment> attachments; // TSA_GUARDED_BY(mutex);
|
||||
};
|
||||
|
||||
void createOrUpdateWorkload(const String & workload_name, const ASTPtr & ast);
|
||||
void deleteWorkload(const String & workload_name);
|
||||
void createResource(const String & resource_name, const ASTPtr & ast);
|
||||
void deleteResource(const String & resource_name);
|
||||
|
||||
// Topological sorting of worklaods
|
||||
void topologicallySortedWorkloadsImpl(Workload * workload, std::unordered_set<Workload *> & visited, std::vector<Workload *> & sorted_workloads);
|
||||
std::vector<Workload *> topologicallySortedWorkloads();
|
||||
|
||||
IWorkloadEntityStorage & storage;
|
||||
scope_guard workload_change_subscription;
|
||||
scope_guard resource_change_subscription;
|
||||
|
||||
std::mutex mutex;
|
||||
std::unordered_map<String, WorkloadPtr> workloads; // TSA_GUARDED_BY(mutex);
|
||||
std::unordered_map<String, ResourcePtr> resources; // TSA_GUARDED_BY(mutex);
|
||||
};
|
||||
|
||||
}
|
@ -39,6 +39,16 @@ public:
|
||||
: ISchedulerNode(event_queue_, config, config_prefix)
|
||||
{}
|
||||
|
||||
explicit PriorityPolicy(EventQueue * event_queue_, const SchedulerNodeInfo & node_info)
|
||||
: ISchedulerNode(event_queue_, node_info)
|
||||
{}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("priority");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/Scheduler/ISchedulerNode.h"
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
#include <mutex>
|
||||
@ -24,6 +25,18 @@ public:
|
||||
, max_cost(config.getInt64(config_prefix + ".max_cost", config.getInt64(config_prefix + ".max_bytes", default_max_cost)))
|
||||
{}
|
||||
|
||||
SemaphoreConstraint(EventQueue * event_queue_, const SchedulerNodeInfo & info_, Int64 max_requests_, Int64 max_cost_)
|
||||
: ISchedulerConstraint(event_queue_, info_)
|
||||
, max_requests(max_requests_)
|
||||
, max_cost(max_cost_)
|
||||
{}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("inflight_limit");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
@ -69,10 +82,7 @@ public:
|
||||
if (!request)
|
||||
return {nullptr, false};
|
||||
|
||||
// Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`.
|
||||
// The former is initialized here dynamically and the latter is initialized once during hierarchy construction.
|
||||
if (!request->constraint)
|
||||
request->constraint = this;
|
||||
request->addConstraint(this);
|
||||
|
||||
// Update state on request arrival
|
||||
std::unique_lock lock(mutex);
|
||||
@ -87,10 +97,6 @@ public:
|
||||
|
||||
void finishRequest(ResourceRequest * request) override
|
||||
{
|
||||
// Recursive traverse of parent flow controls in reverse order
|
||||
if (parent_constraint)
|
||||
parent_constraint->finishRequest(request);
|
||||
|
||||
// Update state on request departure
|
||||
std::unique_lock lock(mutex);
|
||||
bool was_active = active();
|
||||
|
@ -3,8 +3,6 @@
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <limits>
|
||||
#include <utility>
|
||||
|
||||
|
||||
@ -28,12 +26,26 @@ public:
|
||||
, tokens(max_burst)
|
||||
{}
|
||||
|
||||
ThrottlerConstraint(EventQueue * event_queue_, const SchedulerNodeInfo & info_, double max_speed_, double max_burst_)
|
||||
: ISchedulerConstraint(event_queue_, info_)
|
||||
, max_speed(max_speed_)
|
||||
, max_burst(max_burst_)
|
||||
, last_update(event_queue_->now())
|
||||
, tokens(max_burst)
|
||||
{}
|
||||
|
||||
~ThrottlerConstraint() override
|
||||
{
|
||||
// We should cancel event on destruction to avoid dangling references from event queue
|
||||
event_queue->cancelPostponed(postponed);
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("bandwidth_limit");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
@ -79,10 +91,7 @@ public:
|
||||
if (!request)
|
||||
return {nullptr, false};
|
||||
|
||||
// Request has reference to the first (closest to leaf) `constraint`, which can have `parent_constraint`.
|
||||
// The former is initialized here dynamically and the latter is initialized once during hierarchy construction.
|
||||
if (!request->constraint)
|
||||
request->constraint = this;
|
||||
// We don't do `request->addConstraint(this)` because `finishRequest()` is no-op
|
||||
|
||||
updateBucket(request->cost);
|
||||
|
||||
@ -93,12 +102,8 @@ public:
|
||||
return {request, active()};
|
||||
}
|
||||
|
||||
void finishRequest(ResourceRequest * request) override
|
||||
void finishRequest(ResourceRequest *) override
|
||||
{
|
||||
// Recursive traverse of parent flow controls in reverse order
|
||||
if (parent_constraint)
|
||||
parent_constraint->finishRequest(request);
|
||||
|
||||
// NOTE: Token-bucket constraint does not require any action when consumption ends
|
||||
}
|
||||
|
||||
|
433
src/Common/Scheduler/Nodes/UnifiedSchedulerNode.h
Normal file
433
src/Common/Scheduler/Nodes/UnifiedSchedulerNode.h
Normal file
@ -0,0 +1,433 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Priority.h>
|
||||
#include <Common/Scheduler/Nodes/PriorityPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/FairPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/ThrottlerConstraint.h>
|
||||
#include <Common/Scheduler/Nodes/SemaphoreConstraint.h>
|
||||
#include <Common/Scheduler/ISchedulerQueue.h>
|
||||
#include <Common/Scheduler/Nodes/FifoQueue.h>
|
||||
#include <Common/Scheduler/ISchedulerNode.h>
|
||||
#include <Common/Scheduler/SchedulingSettings.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class UnifiedSchedulerNode;
|
||||
using UnifiedSchedulerNodePtr = std::shared_ptr<UnifiedSchedulerNode>;
|
||||
|
||||
/*
|
||||
* Unified scheduler node combines multiple nodes internally to provide all available scheduling policies and constraints.
|
||||
* Whole scheduling hierarchy could "logically" consist of unified nodes only. Physically intermediate "internal" nodes
|
||||
* are also present. This approch is easiers for manipulations in runtime than using multiple types of nodes.
|
||||
*
|
||||
* Unified node is capable of updating its internal structure based on:
|
||||
* 1. Number of children (fifo if =0 or fairness/priority if >0).
|
||||
* 2. Priorities of its children (for subtree structure).
|
||||
* 3. `SchedulingSettings` associated with unified node (for throttler and semaphore constraints).
|
||||
*
|
||||
* In general, unified node has "internal" subtree with the following structure:
|
||||
*
|
||||
* THIS <-- UnifiedSchedulerNode object
|
||||
* |
|
||||
* THROTTLER <-- [Optional] Throttling scheduling constraint
|
||||
* |
|
||||
* [If no children]------ SEMAPHORE <-- [Optional] Semaphore constraint
|
||||
* | |
|
||||
* FIFO PRIORITY <-- [Optional] Scheduling policy distinguishing priorities
|
||||
* .-------' '-------.
|
||||
* FAIRNESS[p1] ... FAIRNESS[pN] <-- [Optional] Policies for fairness if priorities are equal
|
||||
* / \ / \
|
||||
* CHILD[p1,w1] ... CHILD[p1,wM] CHILD[pN,w1] ... CHILD[pN,wM] <-- Unified children (UnifiedSchedulerNode objects)
|
||||
*
|
||||
* NOTE: to distinguish different kinds of children we use the following terms:
|
||||
* - immediate child: child of unified object (THROTTLER);
|
||||
* - unified child: leaf of this "internal" subtree (CHILD[p,w]);
|
||||
* - intermediate node: any child that is not UnifiedSchedulerNode (unified child or `this`)
|
||||
*/
|
||||
class UnifiedSchedulerNode : public ISchedulerNode
|
||||
{
|
||||
private:
|
||||
/// Helper function for managing a parent of a node
|
||||
static void reparent(const SchedulerNodePtr & node, const SchedulerNodePtr & new_parent)
|
||||
{
|
||||
reparent(node, new_parent.get());
|
||||
}
|
||||
|
||||
/// Helper function for managing a parent of a node
|
||||
static void reparent(const SchedulerNodePtr & node, ISchedulerNode * new_parent)
|
||||
{
|
||||
chassert(new_parent);
|
||||
if (new_parent == node->parent)
|
||||
return;
|
||||
if (node->parent)
|
||||
node->parent->removeChild(node.get());
|
||||
new_parent->attachChild(node);
|
||||
}
|
||||
|
||||
/// Helper function for managing a parent of a node
|
||||
static void detach(const SchedulerNodePtr & node)
|
||||
{
|
||||
if (node->parent)
|
||||
node->parent->removeChild(node.get());
|
||||
}
|
||||
|
||||
/// A branch of the tree for a specific priority value
|
||||
struct FairnessBranch {
|
||||
SchedulerNodePtr root; /// FairPolicy node is used if multiple children with the same priority are attached
|
||||
std::unordered_map<String, UnifiedSchedulerNodePtr> children; // basename -> child
|
||||
|
||||
SchedulerNodePtr getRoot()
|
||||
{
|
||||
chassert(!children.empty());
|
||||
if (root)
|
||||
return root;
|
||||
return children.begin()->second; // There should be exactly one child
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto [it, inserted] = children.emplace(child->basename, child); !inserted)
|
||||
throw Exception(
|
||||
ErrorCodes::INVALID_SCHEDULER_NODE,
|
||||
"Can't add another child with the same path: {}",
|
||||
it->second->getPath());
|
||||
|
||||
if (children.size() == 2)
|
||||
{
|
||||
// Insert fair node if we have just added the second child
|
||||
chassert(!root);
|
||||
root = std::make_shared<FairPolicy>(event_queue_, SchedulerNodeInfo{});
|
||||
root->info.setPriority(child->info.priority);
|
||||
root->basename = fmt::format("p{}_fair", child->info.priority.value);
|
||||
for (auto & [_, node] : children)
|
||||
reparent(node, root);
|
||||
return root; // New root has been created
|
||||
}
|
||||
else if (children.size() == 1)
|
||||
return child; // We have added single child so far and it is the new root
|
||||
else
|
||||
reparent(child, root);
|
||||
return {}; // Root is the same
|
||||
}
|
||||
};
|
||||
|
||||
/// Handles all the children nodes with intermediate fair and/or priority nodes
|
||||
struct ChildrenBranch
|
||||
{
|
||||
SchedulerNodePtr root; /// PriorityPolicy node is used if multiple children with different priority are attached
|
||||
std::unordered_map<Priority::Value, FairnessBranch> branches; /// Branches for different priority values
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
bool existing_branch = branches.contains(child->info.priority);
|
||||
auto & child_branch = branches[child->info.priority];
|
||||
auto branch_root = child_branch.attachUnifiedChild(event_queue_, child);
|
||||
|
||||
if (existing_branch)
|
||||
{
|
||||
if (branch_root)
|
||||
{
|
||||
if (root)
|
||||
reparent(branch_root, root);
|
||||
else
|
||||
return branch_root;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
else
|
||||
{
|
||||
chassert(branch_root);
|
||||
if (branches.size() == 2)
|
||||
{
|
||||
// Insert priority node if we have just added the second branch
|
||||
chassert(!root);
|
||||
root = std::make_shared<PriorityPolicy>(event_queue_, SchedulerNodeInfo{});
|
||||
root->basename = "prio";
|
||||
for (auto & [_, branch] : branches)
|
||||
reparent(branch.getRoot(), root);
|
||||
return root; // New root has been created
|
||||
}
|
||||
else if (branches.size() == 1)
|
||||
return child; // We have added single child so far and it is the new root
|
||||
else
|
||||
reparent(child, root);
|
||||
return {}; // Root is the same
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Handles degenerate case of zero children (a fifo queue) or delegate to `ChildrenBranch`.
|
||||
struct QueueOrChildrenBranch
|
||||
{
|
||||
SchedulerNodePtr queue; /// FifoQueue node is used if there are no children
|
||||
ChildrenBranch branch; /// Used if there is at least one child
|
||||
|
||||
// Should be called after constructor, before any other methods
|
||||
[[nodiscard]] SchedulerNodePtr initialize(EventQueue * event_queue_)
|
||||
{
|
||||
createQueue(event_queue_);
|
||||
return queue;
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (queue)
|
||||
removeQueue();
|
||||
return branch.attachUnifiedChild(event_queue_, child);
|
||||
}
|
||||
|
||||
private:
|
||||
void createQueue(EventQueue * event_queue_)
|
||||
{
|
||||
queue = std::make_shared<FifoQueue>(event_queue_, SchedulerNodeInfo{});
|
||||
queue->basename = "fifo";
|
||||
}
|
||||
|
||||
void removeQueue()
|
||||
{
|
||||
// This unified node will not be able to process resource requests any longer
|
||||
// All remaining resource requests are be aborted on queue destruction
|
||||
detach(queue);
|
||||
std::static_pointer_cast<ISchedulerQueue>(queue)->purgeQueue();
|
||||
queue.reset();
|
||||
}
|
||||
};
|
||||
|
||||
/// Handles all the nodes under this unified node
|
||||
/// Specifically handles constraints with `QueueOrChildrenBranch` under it
|
||||
struct ConstraintsBranch
|
||||
{
|
||||
SchedulerNodePtr throttler;
|
||||
SchedulerNodePtr semaphore;
|
||||
QueueOrChildrenBranch branch;
|
||||
SchedulingSettings settings;
|
||||
|
||||
// Should be called after constructor, before any other methods
|
||||
[[nodiscard]] SchedulerNodePtr initialize(EventQueue * event_queue_, const SchedulingSettings & settings_)
|
||||
{
|
||||
settings = settings_;
|
||||
SchedulerNodePtr node = branch.initialize(event_queue_);
|
||||
if (settings.hasSemaphore())
|
||||
{
|
||||
semaphore = std::make_shared<SemaphoreConstraint>(event_queue_, SchedulerNodeInfo{}, settings.max_requests, settings.max_cost);
|
||||
semaphore->basename = "semaphore";
|
||||
reparent(node, semaphore);
|
||||
node = semaphore;
|
||||
}
|
||||
if (settings.hasThrottler())
|
||||
{
|
||||
throttler = std::make_shared<ThrottlerConstraint>(event_queue_, SchedulerNodeInfo{}, settings.max_speed, settings.max_burst);
|
||||
throttler->basename = "throttler";
|
||||
reparent(node, throttler);
|
||||
node = throttler;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
/// Attaches a new child.
|
||||
/// Returns root node if it has been changed to a different node, otherwise returns null.
|
||||
[[nodiscard]] SchedulerNodePtr attachUnifiedChild(EventQueue * event_queue_, const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto branch_root = branch.attachUnifiedChild(event_queue_, child))
|
||||
{
|
||||
if (semaphore)
|
||||
reparent(branch_root, semaphore);
|
||||
else if (throttler)
|
||||
reparent(branch_root, throttler);
|
||||
else
|
||||
return branch_root;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
explicit UnifiedSchedulerNode(EventQueue * event_queue_, const SchedulingSettings & settings)
|
||||
: ISchedulerNode(event_queue_, SchedulerNodeInfo(settings.weight, settings.priority))
|
||||
{
|
||||
immediate_child = impl.initialize(event_queue, settings);
|
||||
reparent(immediate_child, this);
|
||||
}
|
||||
|
||||
/// Attaches a unified child as a leaf of internal subtree and insert or update all the intermediate nodes
|
||||
/// NOTE: Do not confuse with `attachChild()` which is used only for immediate children
|
||||
void attachUnifiedChild(const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
if (auto new_child = impl.attachUnifiedChild(event_queue, child))
|
||||
reparent(new_child, this);
|
||||
}
|
||||
|
||||
/// Detaches unified child and update all the intermediate nodes.
|
||||
/// Detached child could be safely attached to another parent.
|
||||
/// NOTE: Do not confuse with `removeChild()` which is used only for immediate children
|
||||
void detachUnifiedChild(const UnifiedSchedulerNodePtr & child)
|
||||
{
|
||||
UNUSED(child); // TODO(serxa): implement detachUnifiedChild()
|
||||
}
|
||||
|
||||
/// Updates intermediate nodes subtree according with new priority (priority is set by the caller beforehand)
|
||||
/// NOTE: Changing a priority of a unified child may lead to change of its parent.
|
||||
void updateUnifiedChildPriority(const UnifiedSchedulerNodePtr & child, Priority old_priority, Priority new_priority)
|
||||
{
|
||||
UNUSED(child, old_priority, new_priority); // TODO(serxa): implement updateUnifiedChildPriority()
|
||||
}
|
||||
|
||||
/// Updates scheduling settings. Set of constraints might change.
|
||||
/// NOTE: Caller is responsible for calling `updateUnifiedChildPriority` in parent unified node (if any)
|
||||
void updateSchedulingSettings(const SchedulingSettings & new_settings)
|
||||
{
|
||||
UNUSED(new_settings); // TODO(serxa): implement updateSchedulingSettings()
|
||||
info.setPriority(new_settings.priority);
|
||||
info.setWeight(new_settings.weight);
|
||||
}
|
||||
|
||||
/// Returns the queue to be used for resource requests or `nullptr` if it has unified children
|
||||
std::shared_ptr<ISchedulerQueue> getQueue()
|
||||
{
|
||||
return static_pointer_cast<ISchedulerQueue>(impl.branch.queue);
|
||||
}
|
||||
|
||||
/// Collects nodes that could be accessed with raw pointers by resource requests (queue and constraints)
|
||||
/// NOTE: This is a building block for classifier. Note that due to possible movement of a queue, set of constraints
|
||||
/// for that queue might change in future, and `request->constraints` might reference nodes not in
|
||||
/// the initial set of nodes returned by `addRawPointerNodes()`. To avoid destruction of such additional nodes
|
||||
/// classifier must (indirectly) hold nodes return by `addRawPointerNodes()` for all future versions of
|
||||
/// all unified nodes. Such a version control is done by `IOResourceManager`.
|
||||
void addRawPointerNodes(std::vector<SchedulerNodePtr> & nodes)
|
||||
{
|
||||
if (impl.throttler)
|
||||
nodes.push_back(impl.throttler);
|
||||
if (impl.semaphore)
|
||||
nodes.push_back(impl.semaphore);
|
||||
if (impl.branch.queue)
|
||||
nodes.push_back(impl.branch.queue);
|
||||
for (auto & [_, branch] : impl.branch.branch.branches)
|
||||
{
|
||||
for (auto & [_, child] : branch.children)
|
||||
child->addRawPointerNodes(nodes);
|
||||
}
|
||||
}
|
||||
|
||||
bool hasUnifiedChildren() const
|
||||
{
|
||||
return impl.branch.queue == nullptr;
|
||||
}
|
||||
|
||||
/// Introspection. Calls a visitor for self and every internal node. Do not recurse into unified children.
|
||||
void forEachSchedulerNode(std::function<void(ISchedulerNode *)> visitor)
|
||||
{
|
||||
visitor(this);
|
||||
if (impl.throttler)
|
||||
visitor(impl.throttler.get());
|
||||
if (impl.semaphore)
|
||||
visitor(impl.semaphore.get());
|
||||
if (impl.branch.queue)
|
||||
visitor(impl.branch.queue.get());
|
||||
if (impl.branch.branch.root) // priority
|
||||
visitor(impl.branch.branch.root.get());
|
||||
for (auto & [_, branch] : impl.branch.branch.branches)
|
||||
{
|
||||
if (branch.root) // fairness
|
||||
visitor(branch.root.get());
|
||||
}
|
||||
}
|
||||
|
||||
protected: // Hide all the ISchedulerNode interface methods as an implementation details
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("unified");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode *) override
|
||||
{
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Attaches an immediate child (used through `reparent()`)
|
||||
void attachChild(const SchedulerNodePtr & child_) override
|
||||
{
|
||||
immediate_child = child_;
|
||||
immediate_child->setParent(this);
|
||||
|
||||
// Activate if required
|
||||
if (immediate_child->isActive())
|
||||
activateChild(immediate_child.get());
|
||||
}
|
||||
|
||||
/// Removes an immediate child (used through `reparent()`)
|
||||
void removeChild(ISchedulerNode * child) override
|
||||
{
|
||||
if (immediate_child.get() == child)
|
||||
{
|
||||
child_active = false; // deactivate
|
||||
immediate_child->setParent(nullptr); // detach
|
||||
immediate_child.reset();
|
||||
}
|
||||
}
|
||||
|
||||
ISchedulerNode * getChild(const String & child_name) override
|
||||
{
|
||||
if (immediate_child->basename == child_name)
|
||||
return immediate_child.get();
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<ResourceRequest *, bool> dequeueRequest() override
|
||||
{
|
||||
auto [request, child_now_active] = immediate_child->dequeueRequest();
|
||||
if (!request)
|
||||
return {nullptr, false};
|
||||
|
||||
child_active = child_now_active;
|
||||
if (!child_active)
|
||||
busy_periods++;
|
||||
incrementDequeued(request->cost);
|
||||
return {request, child_active};
|
||||
}
|
||||
|
||||
bool isActive() override
|
||||
{
|
||||
return child_active;
|
||||
}
|
||||
|
||||
/// Shows number of immediate active children (for introspection)
|
||||
size_t activeChildren() override
|
||||
{
|
||||
return child_active;
|
||||
}
|
||||
|
||||
/// Activate an immediate child
|
||||
void activateChild(ISchedulerNode * child) override
|
||||
{
|
||||
if (child == immediate_child.get())
|
||||
if (!std::exchange(child_active, true) && parent)
|
||||
parent->activateChild(this);
|
||||
}
|
||||
|
||||
private:
|
||||
ConstraintsBranch impl;
|
||||
SchedulerNodePtr immediate_child; // An immediate child (actually the root of the whole subtree)
|
||||
bool child_active = false;
|
||||
};
|
||||
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
#include <Common/Scheduler/Nodes/registerResourceManagers.h>
|
||||
#include <Common/Scheduler/ResourceManagerFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerDynamicResourceManager(ResourceManagerFactory &);
|
||||
|
||||
void registerResourceManagers()
|
||||
{
|
||||
auto & factory = ResourceManagerFactory::instance();
|
||||
registerDynamicResourceManager(factory);
|
||||
}
|
||||
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void registerResourceManagers();
|
||||
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include "Common/Scheduler/SchedulingSettings.h"
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
#include <Common/Scheduler/SchedulerRoot.h>
|
||||
#include <Common/Scheduler/ResourceGuard.h>
|
||||
@ -7,17 +8,21 @@
|
||||
#include <Common/Scheduler/Nodes/PriorityPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/FifoQueue.h>
|
||||
#include <Common/Scheduler/Nodes/SemaphoreConstraint.h>
|
||||
#include <Common/Scheduler/Nodes/UnifiedSchedulerNode.h>
|
||||
#include <Common/Scheduler/Nodes/registerSchedulerNodes.h>
|
||||
#include <Common/Scheduler/Nodes/registerResourceManagers.h>
|
||||
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <barrier>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -26,7 +31,7 @@ struct ResourceTestBase
|
||||
{
|
||||
ResourceTestBase()
|
||||
{
|
||||
[[maybe_unused]] static bool typesRegistered = [] { registerSchedulerNodes(); registerResourceManagers(); return true; }();
|
||||
[[maybe_unused]] static bool typesRegistered = [] { registerSchedulerNodes(); return true; }();
|
||||
}
|
||||
|
||||
template <class TClass>
|
||||
@ -37,10 +42,16 @@ struct ResourceTestBase
|
||||
Poco::AutoPtr config{new Poco::Util::XMLConfiguration(stream)};
|
||||
String config_prefix = "node";
|
||||
|
||||
return add<TClass>(event_queue, root_node, path, std::ref(*config), config_prefix);
|
||||
}
|
||||
|
||||
template <class TClass, class... Args>
|
||||
static TClass * add(EventQueue * event_queue, SchedulerNodePtr & root_node, const String & path, Args... args)
|
||||
{
|
||||
if (path == "/")
|
||||
{
|
||||
EXPECT_TRUE(root_node.get() == nullptr);
|
||||
root_node.reset(new TClass(event_queue, *config, config_prefix));
|
||||
root_node.reset(new TClass(event_queue, std::forward<Args>(args)...));
|
||||
return static_cast<TClass *>(root_node.get());
|
||||
}
|
||||
|
||||
@ -65,7 +76,7 @@ struct ResourceTestBase
|
||||
}
|
||||
|
||||
EXPECT_TRUE(!child_name.empty()); // wrong path
|
||||
SchedulerNodePtr node = std::make_shared<TClass>(event_queue, *config, config_prefix);
|
||||
SchedulerNodePtr node = std::make_shared<TClass>(event_queue, std::forward<Args>(args)...);
|
||||
node->basename = child_name;
|
||||
parent->attachChild(node);
|
||||
return static_cast<TClass *>(node.get());
|
||||
@ -107,25 +118,70 @@ class ResourceTestClass : public ResourceTestBase
|
||||
{
|
||||
struct Request : public ResourceRequest
|
||||
{
|
||||
ResourceTestClass * test;
|
||||
String name;
|
||||
|
||||
Request(ResourceCost cost_, const String & name_)
|
||||
Request(ResourceTestClass * test_, ResourceCost cost_, const String & name_)
|
||||
: ResourceRequest(cost_)
|
||||
, test(test_)
|
||||
, name(name_)
|
||||
{}
|
||||
|
||||
void execute() override
|
||||
{
|
||||
}
|
||||
|
||||
void failed(const std::exception_ptr &) override
|
||||
{
|
||||
test->failed_cost += cost;
|
||||
delete this;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
~ResourceTestClass()
|
||||
{
|
||||
dequeue(); // Just to avoid any leaks of `Request` object
|
||||
}
|
||||
|
||||
template <class TClass>
|
||||
void add(const String & path, const String & xml = {})
|
||||
{
|
||||
ResourceTestBase::add<TClass>(&event_queue, root_node, path, xml);
|
||||
}
|
||||
|
||||
template <class TClass, class... Args>
|
||||
void addCustom(const String & path, Args... args)
|
||||
{
|
||||
ResourceTestBase::add<TClass>(&event_queue, root_node, path, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
UnifiedSchedulerNodePtr createUnifiedNode(const String & basename, const SchedulingSettings & settings = {})
|
||||
{
|
||||
return createUnifiedNode(basename, {}, settings);
|
||||
}
|
||||
|
||||
UnifiedSchedulerNodePtr createUnifiedNode(const String & basename, const UnifiedSchedulerNodePtr & parent, const SchedulingSettings & settings = {})
|
||||
{
|
||||
auto node = std::make_shared<UnifiedSchedulerNode>(&event_queue, settings);
|
||||
node->basename = basename;
|
||||
if (parent)
|
||||
{
|
||||
parent->attachUnifiedChild(node);
|
||||
}
|
||||
else
|
||||
{
|
||||
EXPECT_TRUE(root_node.get() == nullptr);
|
||||
root_node = node;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
void enqueue(const UnifiedSchedulerNodePtr & node, const std::vector<ResourceCost> & costs)
|
||||
{
|
||||
enqueueImpl(node->getQueue().get(), costs, node->basename);
|
||||
}
|
||||
|
||||
void enqueue(const String & path, const std::vector<ResourceCost> & costs)
|
||||
{
|
||||
ASSERT_TRUE(root_node.get() != nullptr); // root should be initialized first
|
||||
@ -146,13 +202,14 @@ public:
|
||||
pos = String::npos;
|
||||
}
|
||||
}
|
||||
ISchedulerQueue * queue = dynamic_cast<ISchedulerQueue *>(node);
|
||||
ASSERT_TRUE(queue != nullptr); // not a queue
|
||||
enqueueImpl(dynamic_cast<ISchedulerQueue *>(node), costs);
|
||||
}
|
||||
|
||||
void enqueueImpl(ISchedulerQueue * queue, const std::vector<ResourceCost> & costs, const String & name = {})
|
||||
{
|
||||
ASSERT_TRUE(queue != nullptr); // not a queue
|
||||
for (ResourceCost cost : costs)
|
||||
{
|
||||
queue->enqueueRequest(new Request(cost, queue->basename));
|
||||
}
|
||||
queue->enqueueRequest(new Request(this, cost, name.empty() ? queue->basename : name));
|
||||
processEvents(); // to activate queues
|
||||
}
|
||||
|
||||
@ -208,6 +265,12 @@ public:
|
||||
consumed_cost[name] -= value;
|
||||
}
|
||||
|
||||
void failed(ResourceCost value)
|
||||
{
|
||||
EXPECT_EQ(failed_cost, value);
|
||||
failed_cost -= value;
|
||||
}
|
||||
|
||||
void processEvents()
|
||||
{
|
||||
while (event_queue.tryProcess()) {}
|
||||
@ -217,6 +280,7 @@ private:
|
||||
EventQueue event_queue;
|
||||
SchedulerNodePtr root_node;
|
||||
std::unordered_map<String, ResourceCost> consumed_cost;
|
||||
ResourceCost failed_cost = 0;
|
||||
};
|
||||
|
||||
template <class TManager>
|
||||
|
@ -13,6 +13,12 @@ public:
|
||||
, log(log_)
|
||||
{}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("fake");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
void attachChild(const SchedulerNodePtr & child) override
|
||||
{
|
||||
log += " +" + child->basename;
|
||||
|
@ -101,6 +101,11 @@ struct MyRequest : public ResourceRequest
|
||||
if (on_execute)
|
||||
on_execute();
|
||||
}
|
||||
|
||||
void failed(const std::exception_ptr &) override
|
||||
{
|
||||
FAIL();
|
||||
}
|
||||
};
|
||||
|
||||
TEST(SchedulerRoot, Smoke)
|
||||
|
@ -0,0 +1,495 @@
|
||||
#include <chrono>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Common/Scheduler/ResourceGuard.h>
|
||||
#include <Common/Scheduler/ResourceLink.h>
|
||||
#include <Common/Scheduler/Nodes/tests/ResourceTest.h>
|
||||
|
||||
#include <Common/Priority.h>
|
||||
#include <Common/Scheduler/Nodes/FairPolicy.h>
|
||||
#include <Common/Scheduler/Nodes/UnifiedSchedulerNode.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
using ResourceTest = ResourceTestClass;
|
||||
|
||||
TEST(SchedulerUnifiedNode, Smoke)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
t.addCustom<UnifiedSchedulerNode>("/", SchedulingSettings{});
|
||||
|
||||
t.enqueue("/fifo", {10, 10});
|
||||
t.dequeue(2);
|
||||
t.consumed("fifo", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessWeight)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 1.0, .priority = Priority{}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 3.0, .priority = Priority{}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 30);
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 30);
|
||||
|
||||
t.dequeue();
|
||||
t.consumed("A", 60);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessActivation)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
auto c = t.createUnifiedNode("C", all);
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10});
|
||||
t.enqueue(c, {10, 10});
|
||||
|
||||
t.dequeue(3);
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.dequeue(4);
|
||||
t.consumed("A", 30);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.enqueue(b, {10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed("B", 10);
|
||||
|
||||
t.enqueue(c, {10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.dequeue(2); // A B or B A
|
||||
t.consumed("A", 10);
|
||||
t.consumed("B", 10);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessMaxMin)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
|
||||
t.enqueue(a, {10, 10}); // make sure A is never empty
|
||||
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
t.enqueue(a, {10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10});
|
||||
|
||||
t.dequeue(6);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, FairnessHierarchical)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto x = t.createUnifiedNode("X", all);
|
||||
auto y = t.createUnifiedNode("Y", all);
|
||||
auto a = t.createUnifiedNode("A", x);
|
||||
auto b = t.createUnifiedNode("B", x);
|
||||
auto c = t.createUnifiedNode("C", y);
|
||||
auto d = t.createUnifiedNode("D", y);
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 20);
|
||||
t.consumed("D", 20);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("C", 20);
|
||||
t.consumed("D", 20);
|
||||
}
|
||||
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("B", 40);
|
||||
t.consumed("C", 20);
|
||||
t.consumed("D", 20);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 40);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("D", 40);
|
||||
}
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(d, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
t.dequeue(8);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("D", 40);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, Priority)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.priority = Priority{3}});
|
||||
auto b = t.createUnifiedNode("B", all, {.priority = Priority{2}});
|
||||
auto c = t.createUnifiedNode("C", all, {.priority = Priority{1}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10});
|
||||
t.enqueue(b, {10, 10, 10});
|
||||
t.enqueue(c, {10, 10, 10});
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 20);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 10);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 0);
|
||||
|
||||
t.dequeue();
|
||||
t.consumed("A", 30);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 0);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, PriorityActivation)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
auto a = t.createUnifiedNode("A", all, {.priority = Priority{3}});
|
||||
auto b = t.createUnifiedNode("B", all, {.priority = Priority{2}});
|
||||
auto c = t.createUnifiedNode("C", all, {.priority = Priority{1}});
|
||||
|
||||
t.enqueue(a, {10, 10, 10, 10, 10, 10});
|
||||
t.enqueue(b, {10});
|
||||
t.enqueue(c, {10, 10});
|
||||
|
||||
t.dequeue(3);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 20);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 0);
|
||||
|
||||
t.enqueue(b, {10, 10, 10});
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 20);
|
||||
t.consumed("C", 0);
|
||||
|
||||
t.enqueue(c, {10, 10});
|
||||
t.dequeue(3);
|
||||
t.consumed("A", 0);
|
||||
t.consumed("B", 10);
|
||||
t.consumed("C", 20);
|
||||
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
t.consumed("B", 0);
|
||||
t.consumed("C", 0);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, List)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
std::list<UnifiedSchedulerNodePtr> list;
|
||||
list.push_back(t.createUnifiedNode("all"));
|
||||
|
||||
for (int length = 1; length < 5; length++)
|
||||
{
|
||||
String name = fmt::format("L{}", length);
|
||||
list.push_back(t.createUnifiedNode(name, list.back()));
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
t.enqueue(list.back(), {10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
|
||||
for (int j = 0; j < 3; j++)
|
||||
{
|
||||
t.enqueue(list.back(), {10, 10, 10});
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
}
|
||||
|
||||
t.dequeue(1);
|
||||
t.consumed(name, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerLeakyBucket)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 20.0});
|
||||
|
||||
t.enqueue(all, {10, 10, 10, 10, 10, 10, 10, 10});
|
||||
|
||||
t.process(start + std::chrono::seconds(0));
|
||||
t.consumed("all", 30); // It is allowed to go below zero for exactly one resource request
|
||||
|
||||
t.process(start + std::chrono::seconds(1));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(2));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(3));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(4));
|
||||
t.consumed("all", 10);
|
||||
|
||||
t.process(start + std::chrono::seconds(100500));
|
||||
t.consumed("all", 10);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerPacing)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
// Zero burst allows you to send one request of any `size` and than throttle for `size/max_speed` seconds.
|
||||
// Useful if outgoing traffic should be "paced", i.e. have the least possible burstiness.
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 1.0, .max_burst = 0.0});
|
||||
|
||||
t.enqueue(all, {1, 2, 3, 1, 2, 1});
|
||||
int output[] = {1, 2, 0, 3, 0, 0, 1, 2, 0, 1, 0};
|
||||
for (int i = 0; i < std::size(output); i++)
|
||||
{
|
||||
t.process(start + std::chrono::seconds(i));
|
||||
t.consumed("all", output[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerBucketFilling)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 100.0});
|
||||
|
||||
t.enqueue(all, {100});
|
||||
|
||||
t.process(start + std::chrono::seconds(0));
|
||||
t.consumed("all", 100); // consume all tokens, but it is still active (not negative)
|
||||
|
||||
t.process(start + std::chrono::seconds(5));
|
||||
t.consumed("all", 0); // There was nothing to consume
|
||||
|
||||
t.enqueue(all, {10, 10, 10, 10, 10, 10, 10, 10, 10, 10});
|
||||
t.process(start + std::chrono::seconds(5));
|
||||
t.consumed("all", 60); // 5 sec * 10 tokens/sec = 50 tokens + 1 extra request to go below zero
|
||||
|
||||
t.process(start + std::chrono::seconds(100));
|
||||
t.consumed("all", 40); // Consume rest
|
||||
|
||||
t.process(start + std::chrono::seconds(200));
|
||||
|
||||
t.enqueue(all, {95, 1, 1, 1, 1, 1, 1, 1, 1, 1});
|
||||
t.process(start + std::chrono::seconds(200));
|
||||
t.consumed("all", 101); // check we cannot consume more than max_burst + 1 request
|
||||
|
||||
t.process(start + std::chrono::seconds(100500));
|
||||
t.consumed("all", 3);
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, ThrottlerAndFairness)
|
||||
{
|
||||
ResourceTest t;
|
||||
EventQueue::TimePoint start = std::chrono::system_clock::now();
|
||||
t.process(start, 0);
|
||||
|
||||
auto all = t.createUnifiedNode("all", {.priority = Priority{}, .max_speed = 10.0, .max_burst = 100.0});
|
||||
auto a = t.createUnifiedNode("A", all, {.weight = 10.0, .priority = Priority{}});
|
||||
auto b = t.createUnifiedNode("B", all, {.weight = 90.0, .priority = Priority{}});
|
||||
|
||||
ResourceCost req_cost = 1;
|
||||
ResourceCost total_cost = 2000;
|
||||
for (int i = 0; i < total_cost / req_cost; i++)
|
||||
{
|
||||
t.enqueue(a, {req_cost});
|
||||
t.enqueue(b, {req_cost});
|
||||
}
|
||||
|
||||
double shareA = 0.1;
|
||||
double shareB = 0.9;
|
||||
|
||||
// Bandwidth-latency coupling due to fairness: worst latency is inversely proportional to share
|
||||
auto max_latencyA = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / shareA));
|
||||
auto max_latencyB = static_cast<ResourceCost>(req_cost * (1.0 + 1.0 / shareB));
|
||||
|
||||
double consumedA = 0;
|
||||
double consumedB = 0;
|
||||
for (int seconds = 0; seconds < 100; seconds++)
|
||||
{
|
||||
t.process(start + std::chrono::seconds(seconds));
|
||||
double arrival_curve = 100.0 + 10.0 * seconds + req_cost;
|
||||
t.consumed("A", static_cast<ResourceCost>(arrival_curve * shareA - consumedA), max_latencyA);
|
||||
t.consumed("B", static_cast<ResourceCost>(arrival_curve * shareB - consumedB), max_latencyB);
|
||||
consumedA = arrival_curve * shareA;
|
||||
consumedB = arrival_curve * shareB;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(SchedulerUnifiedNode, QueueWithRequestsDestruction)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
|
||||
t.enqueue(all, {10, 10}); // enqueue reqeuests to be canceled
|
||||
|
||||
// This will destory the queue and fail both requests
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
t.failed(20);
|
||||
|
||||
// Check that everything works fine after destruction
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
t.enqueue(a, {10, 10}); // make sure A is never empty
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
t.enqueue(a, {10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10});
|
||||
|
||||
t.dequeue(6);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
}
|
||||
|
||||
|
||||
TEST(SchedulerUnifiedNode, ResourceGuardException)
|
||||
{
|
||||
ResourceTest t;
|
||||
|
||||
auto all = t.createUnifiedNode("all");
|
||||
|
||||
t.enqueue(all, {10, 10}); // enqueue reqeuests to be canceled
|
||||
|
||||
std::thread consumer([queue = all->getQueue()]
|
||||
{
|
||||
ResourceLink link{.queue = queue.get()};
|
||||
try
|
||||
{
|
||||
ResourceGuard rg(ResourceGuard::Metrics::getIOWrite(), link);
|
||||
FAIL();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
});
|
||||
|
||||
// This will destory the queue and fail both requests
|
||||
auto a = t.createUnifiedNode("A", all);
|
||||
t.failed(20);
|
||||
consumer.join();
|
||||
|
||||
// Check that everything works fine after destruction
|
||||
auto b = t.createUnifiedNode("B", all);
|
||||
t.enqueue(a, {10, 10}); // make sure A is never empty
|
||||
for (int i = 0; i < 10; i++)
|
||||
{
|
||||
t.enqueue(a, {10, 10, 10, 10});
|
||||
t.enqueue(b, {10, 10});
|
||||
|
||||
t.dequeue(6);
|
||||
t.consumed("A", 40);
|
||||
t.consumed("B", 20);
|
||||
}
|
||||
t.dequeue(2);
|
||||
t.consumed("A", 20);
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
#include <Common/CurrentMetrics.h>
|
||||
|
||||
#include <condition_variable>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
@ -34,6 +35,11 @@ namespace CurrentMetrics
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int RESOURCE_ACCESS_DENIED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scoped resource guard.
|
||||
* Waits for resource to be available in constructor and releases resource in destructor
|
||||
@ -109,12 +115,25 @@ public:
|
||||
dequeued_cv.notify_one();
|
||||
}
|
||||
|
||||
// This function is executed inside scheduler thread and wakes thread issued this `request`.
|
||||
// That thread will throw an exception.
|
||||
void failed(const std::exception_ptr & ptr) override
|
||||
{
|
||||
std::unique_lock lock(mutex);
|
||||
chassert(state == Enqueued);
|
||||
state = Dequeued;
|
||||
exception = ptr;
|
||||
dequeued_cv.notify_one();
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
CurrentMetrics::Increment scheduled(metrics->scheduled_count);
|
||||
auto timer = CurrentThread::getProfileEvents().timer(metrics->wait_microseconds);
|
||||
std::unique_lock lock(mutex);
|
||||
dequeued_cv.wait(lock, [this] { return state == Dequeued; });
|
||||
if (exception)
|
||||
throw Exception(ErrorCodes::RESOURCE_ACCESS_DENIED, "Resource request failed: {}", getExceptionMessage(exception, /* with_stacktrace = */ false));
|
||||
}
|
||||
|
||||
void finish(ResourceCost real_cost_, ResourceLink link_)
|
||||
@ -151,6 +170,7 @@ public:
|
||||
std::mutex mutex;
|
||||
std::condition_variable dequeued_cv;
|
||||
RequestState state = Finished;
|
||||
std::exception_ptr exception;
|
||||
};
|
||||
|
||||
/// Creates pending request for resource; blocks while resource is not available (unless `Lock::Defer`)
|
||||
|
@ -1,55 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ErrorCodes.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_SCHEDULER_NODE;
|
||||
}
|
||||
|
||||
class ResourceManagerFactory : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
static ResourceManagerFactory & instance()
|
||||
{
|
||||
static ResourceManagerFactory ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ResourceManagerPtr get(const String & name)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
if (auto iter = methods.find(name); iter != methods.end())
|
||||
return iter->second();
|
||||
throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name);
|
||||
}
|
||||
|
||||
template <class TDerived>
|
||||
void registerMethod(const String & name)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
methods[name] = [] ()
|
||||
{
|
||||
return std::make_shared<TDerived>();
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mutex;
|
||||
using Method = std::function<ResourceManagerPtr()>;
|
||||
std::unordered_map<String, Method> methods;
|
||||
};
|
||||
|
||||
}
|
@ -1,13 +1,42 @@
|
||||
#include <Common/Scheduler/ResourceRequest.h>
|
||||
#include <Common/Scheduler/ISchedulerConstraint.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#include <ranges>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
void ResourceRequest::finish()
|
||||
{
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
// Iterate over constraints in reverse order
|
||||
for (ISchedulerConstraint * constraint : std::ranges::reverse_view(constraints))
|
||||
{
|
||||
if (constraint)
|
||||
constraint->finishRequest(this);
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceRequest::addConstraint(ISchedulerConstraint * new_constraint)
|
||||
{
|
||||
for (auto & constraint : constraints)
|
||||
{
|
||||
if (!constraint)
|
||||
{
|
||||
constraint = new_constraint;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO(serxa): is it possible to validate it during enqueue of resource request to avoid LOGICAL_ERRORs in the scheduler thread? possible but will not cover case of moving queue with requests inside to invalid position
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Max number of simultaneous workload constraints exceeded ({}). Remove extra constraints before using this workload.",
|
||||
ResourceMaxConstraints);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,9 @@
|
||||
|
||||
#include <boost/intrusive/list.hpp>
|
||||
#include <base/types.h>
|
||||
#include <array>
|
||||
#include <limits>
|
||||
#include <exception>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,6 +17,10 @@ class ISchedulerConstraint;
|
||||
using ResourceCost = Int64;
|
||||
constexpr ResourceCost ResourceCostMax = std::numeric_limits<int>::max();
|
||||
|
||||
// TODO(serxa): validate hierarchy to avoid too many constrants
|
||||
/// Max number of constraints for a request to pass though (depth of constaints chain)
|
||||
constexpr size_t ResourceMaxConstraints = 8;
|
||||
|
||||
/*
|
||||
* Request for a resource consumption. The main moving part of the scheduling subsystem.
|
||||
* Resource requests processing workflow:
|
||||
@ -49,9 +55,10 @@ public:
|
||||
/// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it)
|
||||
ResourceCost cost;
|
||||
|
||||
/// Scheduler node to be notified on consumption finish
|
||||
/// Auto-filled during request enqueue/dequeue
|
||||
ISchedulerConstraint * constraint;
|
||||
/// Scheduler nodes to be notified on consumption finish
|
||||
/// Auto-filled during request dequeue
|
||||
/// Vector is not used to avoid allocations in the scheduler thread
|
||||
std::array<ISchedulerConstraint *, ResourceMaxConstraints> constraints;
|
||||
|
||||
explicit ResourceRequest(ResourceCost cost_ = 1)
|
||||
{
|
||||
@ -62,7 +69,8 @@ public:
|
||||
void reset(ResourceCost cost_)
|
||||
{
|
||||
cost = cost_;
|
||||
constraint = nullptr;
|
||||
for (auto & constraint : constraints)
|
||||
constraint = nullptr;
|
||||
// Note that list_base_hook should be reset independently (by intrusive list)
|
||||
}
|
||||
|
||||
@ -74,11 +82,17 @@ public:
|
||||
/// (e.g. setting an std::promise or creating a job in a thread pool)
|
||||
virtual void execute() = 0;
|
||||
|
||||
/// Callback to trigger an error in case if resource is unavailable.
|
||||
virtual void failed(const std::exception_ptr & ptr) = 0;
|
||||
|
||||
/// Stop resource consumption and notify resource scheduler.
|
||||
/// Should be called when resource consumption is finished by consumer.
|
||||
/// ResourceRequest should not be destructed or reset before calling to `finish()`.
|
||||
/// WARNING: this function MUST not be called if request was canceled.
|
||||
/// WARNING: this function MUST not be called if request was canceled or failed.
|
||||
void finish();
|
||||
|
||||
/// Is called from the scheduler thread to fill `constraints` chain
|
||||
void addConstraint(ISchedulerConstraint * new_constraint);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -95,6 +95,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
const String & getTypeName() const override
|
||||
{
|
||||
static String type_name("scheduler");
|
||||
return type_name;
|
||||
}
|
||||
|
||||
bool equals(ISchedulerNode * other) override
|
||||
{
|
||||
if (!ISchedulerNode::equals(other))
|
||||
|
38
src/Common/Scheduler/SchedulingSettings.h
Normal file
38
src/Common/Scheduler/SchedulingSettings.h
Normal file
@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
#include <Common/Priority.h>
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct SchedulingSettings
|
||||
{
|
||||
/// Priority and weight among siblings
|
||||
double weight = 1.0;
|
||||
Priority priority;
|
||||
|
||||
/// Throttling constraints.
|
||||
/// Up to 2 independent throttlers: one for average speed and one for peek speed.
|
||||
static constexpr double default_burst_seconds = 1.0;
|
||||
double max_speed = 0; // Zero means unlimited
|
||||
double max_burst = 0; // default is `default_burst_seconds * max_speed`
|
||||
|
||||
/// Limits total number of concurrent resource requests that are allowed to consume
|
||||
static constexpr Int64 default_max_requests = std::numeric_limits<Int64>::max();
|
||||
Int64 max_requests = default_max_requests;
|
||||
|
||||
/// Limits total cost of concurrent resource requests that are allowed to consume
|
||||
static constexpr Int64 default_max_cost = std::numeric_limits<Int64>::max();
|
||||
Int64 max_cost = default_max_cost;
|
||||
|
||||
bool hasThrottler() const { return max_speed != 0; }
|
||||
bool hasSemaphore() const { return max_requests != default_max_requests || max_cost != default_max_cost; }
|
||||
|
||||
// TODO(serxa): add helper functions for parsing, printing and validating
|
||||
};
|
||||
|
||||
}
|
93
src/Common/Scheduler/Workload/IWorkloadEntityStorage.h
Normal file
93
src/Common/Scheduler/Workload/IWorkloadEntityStorage.h
Normal file
@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <base/scope_guard.h>
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IAST;
|
||||
struct Settings;
|
||||
|
||||
enum class WorkloadEntityType : uint8_t
|
||||
{
|
||||
Workload,
|
||||
Resource,
|
||||
|
||||
MAX
|
||||
};
|
||||
|
||||
/// Interface for a storage of workload entities (WORKLOAD and RESOURCE).
|
||||
class IWorkloadEntityStorage
|
||||
{
|
||||
public:
|
||||
virtual ~IWorkloadEntityStorage() = default;
|
||||
|
||||
/// Whether this storage can replicate entities to another node.
|
||||
virtual bool isReplicated() const { return false; }
|
||||
virtual String getReplicationID() const { return ""; }
|
||||
|
||||
/// Loads all entities. Can be called once - if entities are already loaded the function does nothing.
|
||||
virtual void loadEntities() = 0;
|
||||
|
||||
/// Get entity by name. If no entity stored with entity_name throws exception.
|
||||
virtual ASTPtr get(const String & entity_name) const = 0;
|
||||
|
||||
/// Get entity by name. If no entity stored with entity_name return nullptr.
|
||||
virtual ASTPtr tryGet(const String & entity_name) const = 0;
|
||||
|
||||
/// Check if entity with entity_name is stored.
|
||||
virtual bool has(const String & entity_name) const = 0;
|
||||
|
||||
/// Get all entity names.
|
||||
virtual std::vector<String> getAllEntityNames() const = 0;
|
||||
|
||||
/// Get all entity names of specified type.
|
||||
virtual std::vector<String> getAllEntityNames(WorkloadEntityType entity_type) const = 0;
|
||||
|
||||
/// Get all entities.
|
||||
virtual std::vector<std::pair<String, ASTPtr>> getAllEntities() const = 0;
|
||||
|
||||
/// Check whether any entity have been stored.
|
||||
virtual bool empty() const = 0;
|
||||
|
||||
/// Stops watching.
|
||||
virtual void stopWatching() {}
|
||||
|
||||
/// Immediately reloads all entities, throws an exception if failed.
|
||||
virtual void reloadEntities() = 0;
|
||||
|
||||
/// Stores an entity.
|
||||
virtual bool storeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) = 0;
|
||||
|
||||
/// Removes an entity.
|
||||
virtual bool removeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) = 0;
|
||||
|
||||
using OnChangedHandler = std::function<void(
|
||||
WorkloadEntityType /* entity_type */,
|
||||
const String & /* entity_name */,
|
||||
const ASTPtr & /* new or changed entity, null if removed */)>;
|
||||
|
||||
/// Subscribes for all changes.
|
||||
virtual scope_guard subscribeForChanges(
|
||||
WorkloadEntityType entity_type,
|
||||
const OnChangedHandler & handler) = 0;
|
||||
};
|
||||
|
||||
}
|
299
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.cpp
Normal file
299
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.cpp
Normal file
@ -0,0 +1,299 @@
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityDiskStorage.h>
|
||||
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/atomicRename.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/quoteString.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/ParserCreateWorkloadQuery.h>
|
||||
#include <Parsers/ParserCreateResourceQuery.h>
|
||||
|
||||
#include <Poco/DirectoryIterator.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int DIRECTORY_DOESNT_EXIST;
|
||||
extern const int WORKLOAD_ENTITY_ALREADY_EXISTS;
|
||||
extern const int UNKNOWN_WORKLOAD_ENTITY;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Converts a path to an absolute path and append it with a separator.
|
||||
String makeDirectoryPathCanonical(const String & directory_path)
|
||||
{
|
||||
auto canonical_directory_path = std::filesystem::weakly_canonical(directory_path);
|
||||
if (canonical_directory_path.has_filename())
|
||||
canonical_directory_path += std::filesystem::path::preferred_separator;
|
||||
return canonical_directory_path;
|
||||
}
|
||||
}
|
||||
|
||||
WorkloadEntityDiskStorage::WorkloadEntityDiskStorage(const ContextPtr & global_context_, const String & dir_path_)
|
||||
: WorkloadEntityStorageBase(global_context_)
|
||||
, dir_path{makeDirectoryPathCanonical(dir_path_)}
|
||||
, log{getLogger("WorkloadEntityDiskStorage")}
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
ASTPtr WorkloadEntityDiskStorage::tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name)
|
||||
{
|
||||
return tryLoadEntity(entity_type, entity_name, getFilePath(entity_type, entity_name), /* check_file_exists= */ true);
|
||||
}
|
||||
|
||||
|
||||
ASTPtr WorkloadEntityDiskStorage::tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name, const String & path, bool check_file_exists)
|
||||
{
|
||||
LOG_DEBUG(log, "Loading workload entity {} from file {}", backQuote(entity_name), path);
|
||||
|
||||
try
|
||||
{
|
||||
if (check_file_exists && !fs::exists(path))
|
||||
return nullptr;
|
||||
|
||||
/// There is .sql file with workload entity creation statement.
|
||||
ReadBufferFromFile in(path);
|
||||
|
||||
String entity_create_query;
|
||||
readStringUntilEOF(entity_create_query, in);
|
||||
|
||||
switch (entity_type)
|
||||
{
|
||||
case WorkloadEntityType::Workload:
|
||||
{
|
||||
ParserCreateWorkloadQuery parser;
|
||||
ASTPtr ast = parseQuery(
|
||||
parser,
|
||||
entity_create_query.data(),
|
||||
entity_create_query.data() + entity_create_query.size(),
|
||||
"",
|
||||
0,
|
||||
global_context->getSettingsRef().max_parser_depth,
|
||||
global_context->getSettingsRef().max_parser_backtracks);
|
||||
return ast;
|
||||
}
|
||||
case WorkloadEntityType::Resource:
|
||||
{
|
||||
ParserCreateResourceQuery parser;
|
||||
ASTPtr ast = parseQuery(
|
||||
parser,
|
||||
entity_create_query.data(),
|
||||
entity_create_query.data() + entity_create_query.size(),
|
||||
"",
|
||||
0,
|
||||
global_context->getSettingsRef().max_parser_depth,
|
||||
global_context->getSettingsRef().max_parser_backtracks);
|
||||
return ast;
|
||||
}
|
||||
case WorkloadEntityType::MAX: return nullptr;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log, fmt::format("while loading workload entity {} from path {}", backQuote(entity_name), path));
|
||||
return nullptr; /// Failed to load this entity, will ignore it
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::loadEntities()
|
||||
{
|
||||
if (!entities_loaded)
|
||||
loadEntitiesImpl();
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::reloadEntities()
|
||||
{
|
||||
// TODO(serxa): it does not send notifications, maybe better to remove this method completely
|
||||
loadEntitiesImpl();
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::loadEntitiesImpl()
|
||||
{
|
||||
LOG_INFO(log, "Loading workload entities from {}", dir_path);
|
||||
|
||||
if (!std::filesystem::exists(dir_path))
|
||||
{
|
||||
LOG_DEBUG(log, "The directory for workload entities ({}) does not exist: nothing to load", dir_path);
|
||||
return;
|
||||
}
|
||||
|
||||
std::vector<std::pair<String, ASTPtr>> entities_name_and_queries;
|
||||
|
||||
Poco::DirectoryIterator dir_end;
|
||||
for (Poco::DirectoryIterator it(dir_path); it != dir_end; ++it)
|
||||
{
|
||||
if (it->isDirectory())
|
||||
continue;
|
||||
|
||||
const String & file_name = it.name();
|
||||
|
||||
if (startsWith(file_name, "workload_") && endsWith(file_name, ".sql"))
|
||||
{
|
||||
size_t prefix_length = strlen("workload_");
|
||||
size_t suffix_length = strlen(".sql");
|
||||
String name = unescapeForFileName(file_name.substr(prefix_length, file_name.length() - prefix_length - suffix_length));
|
||||
|
||||
if (name.empty())
|
||||
continue;
|
||||
|
||||
ASTPtr ast = tryLoadEntity(WorkloadEntityType::Workload, name, dir_path + it.name(), /* check_file_exists= */ false);
|
||||
if (ast)
|
||||
entities_name_and_queries.emplace_back(name, ast);
|
||||
}
|
||||
|
||||
if (startsWith(file_name, "resource_") && endsWith(file_name, ".sql"))
|
||||
{
|
||||
size_t prefix_length = strlen("resource_");
|
||||
size_t suffix_length = strlen(".sql");
|
||||
String name = unescapeForFileName(file_name.substr(prefix_length, file_name.length() - prefix_length - suffix_length));
|
||||
|
||||
if (name.empty())
|
||||
continue;
|
||||
|
||||
ASTPtr ast = tryLoadEntity(WorkloadEntityType::Resource, name, dir_path + it.name(), /* check_file_exists= */ false);
|
||||
if (ast)
|
||||
entities_name_and_queries.emplace_back(name, ast);
|
||||
}
|
||||
}
|
||||
|
||||
setAllEntities(entities_name_and_queries);
|
||||
entities_loaded = true;
|
||||
|
||||
LOG_DEBUG(log, "Workload entities loaded");
|
||||
}
|
||||
|
||||
|
||||
void WorkloadEntityDiskStorage::createDirectory()
|
||||
{
|
||||
std::error_code create_dir_error_code;
|
||||
fs::create_directories(dir_path, create_dir_error_code);
|
||||
if (!fs::exists(dir_path) || !fs::is_directory(dir_path) || create_dir_error_code)
|
||||
throw Exception(ErrorCodes::DIRECTORY_DOESNT_EXIST, "Couldn't create directory {} reason: '{}'",
|
||||
dir_path, create_dir_error_code.message());
|
||||
}
|
||||
|
||||
|
||||
bool WorkloadEntityDiskStorage::storeEntityImpl(
|
||||
const ContextPtr & /*current_context*/,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings)
|
||||
{
|
||||
createDirectory();
|
||||
String file_path = getFilePath(entity_type, entity_name);
|
||||
LOG_DEBUG(log, "Storing workload entity {} to file {}", backQuote(entity_name), file_path);
|
||||
|
||||
if (fs::exists(file_path))
|
||||
{
|
||||
if (throw_if_exists)
|
||||
throw Exception(ErrorCodes::WORKLOAD_ENTITY_ALREADY_EXISTS, "Workload entity '{}' already exists", entity_name);
|
||||
else if (!replace_if_exists)
|
||||
return false;
|
||||
}
|
||||
|
||||
WriteBufferFromOwnString create_statement_buf;
|
||||
formatAST(*create_entity_query, create_statement_buf, false);
|
||||
writeChar('\n', create_statement_buf);
|
||||
String create_statement = create_statement_buf.str();
|
||||
|
||||
String temp_file_path = file_path + ".tmp";
|
||||
|
||||
try
|
||||
{
|
||||
WriteBufferFromFile out(temp_file_path, create_statement.size());
|
||||
writeString(create_statement, out);
|
||||
out.next();
|
||||
if (settings.fsync_metadata)
|
||||
out.sync();
|
||||
out.close();
|
||||
|
||||
if (replace_if_exists)
|
||||
fs::rename(temp_file_path, file_path);
|
||||
else
|
||||
renameNoReplace(temp_file_path, file_path);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
fs::remove(temp_file_path);
|
||||
throw;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Entity {} stored", backQuote(entity_name));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool WorkloadEntityDiskStorage::removeEntityImpl(
|
||||
const ContextPtr & /*current_context*/,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists)
|
||||
{
|
||||
String file_path = getFilePath(entity_type, entity_name);
|
||||
LOG_DEBUG(log, "Removing workload entity {} stored in file {}", backQuote(entity_name), file_path);
|
||||
|
||||
bool existed = fs::remove(file_path);
|
||||
|
||||
if (!existed)
|
||||
{
|
||||
if (throw_if_not_exists)
|
||||
throw Exception(ErrorCodes::UNKNOWN_WORKLOAD_ENTITY, "Workload entity '{}' doesn't exist", entity_name);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
LOG_TRACE(log, "Entity {} removed", backQuote(entity_name));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
String WorkloadEntityDiskStorage::getFilePath(WorkloadEntityType entity_type, const String & entity_name) const
|
||||
{
|
||||
String file_path;
|
||||
switch (entity_type)
|
||||
{
|
||||
case WorkloadEntityType::Workload:
|
||||
{
|
||||
file_path = dir_path + "workload_" + escapeForFileName(entity_name) + ".sql";
|
||||
break;
|
||||
}
|
||||
case WorkloadEntityType::Resource:
|
||||
{
|
||||
file_path = dir_path + "resource_" + escapeForFileName(entity_name) + ".sql";
|
||||
break;
|
||||
}
|
||||
case WorkloadEntityType::MAX: break;
|
||||
}
|
||||
return file_path;
|
||||
}
|
||||
|
||||
}
|
48
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.h
Normal file
48
src/Common/Scheduler/Workload/WorkloadEntityDiskStorage.h
Normal file
@ -0,0 +1,48 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Loads workload entities from a specified folder.
|
||||
class WorkloadEntityDiskStorage : public WorkloadEntityStorageBase
|
||||
{
|
||||
public:
|
||||
WorkloadEntityDiskStorage(const ContextPtr & global_context_, const String & dir_path_);
|
||||
|
||||
void loadEntities() override;
|
||||
|
||||
void reloadEntities() override;
|
||||
|
||||
private:
|
||||
bool storeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) override;
|
||||
|
||||
bool removeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) override;
|
||||
|
||||
void createDirectory();
|
||||
void loadEntitiesImpl();
|
||||
ASTPtr tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name);
|
||||
ASTPtr tryLoadEntity(WorkloadEntityType entity_type, const String & entity_name, const String & file_path, bool check_file_exists);
|
||||
String getFilePath(WorkloadEntityType entity_type, const String & entity_name) const;
|
||||
|
||||
String dir_path;
|
||||
LoggerPtr log;
|
||||
std::atomic<bool> entities_loaded = false;
|
||||
};
|
||||
|
||||
}
|
315
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.cpp
Normal file
315
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.cpp
Normal file
@ -0,0 +1,315 @@
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityStorageBase.h>
|
||||
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int WORKLOAD_ENTITY_ALREADY_EXISTS;
|
||||
extern const int UNKNOWN_WORKLOAD_ENTITY;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
ASTPtr normalizeCreateWorkloadEntityQuery(const IAST & create_query, const ContextPtr & context)
|
||||
{
|
||||
UNUSED(context);
|
||||
auto ptr = create_query.clone();
|
||||
if (auto * res = typeid_cast<ASTCreateWorkloadQuery *>(ptr.get()))
|
||||
{
|
||||
res->if_not_exists = false;
|
||||
res->or_replace = false;
|
||||
}
|
||||
if (auto * res = typeid_cast<ASTCreateResourceQuery *>(ptr.get()))
|
||||
{
|
||||
res->if_not_exists = false;
|
||||
res->or_replace = false;
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
WorkloadEntityType getEntityType(const ASTPtr & ptr)
|
||||
{
|
||||
if (auto * res = typeid_cast<ASTCreateWorkloadQuery *>(ptr.get()))
|
||||
return WorkloadEntityType::Workload;
|
||||
if (auto * res = typeid_cast<ASTCreateResourceQuery *>(ptr.get()))
|
||||
return WorkloadEntityType::Resource;
|
||||
chassert(false);
|
||||
return WorkloadEntityType::MAX;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
WorkloadEntityStorageBase::WorkloadEntityStorageBase(ContextPtr global_context_)
|
||||
: global_context(std::move(global_context_))
|
||||
{}
|
||||
|
||||
ASTPtr WorkloadEntityStorageBase::get(const String & entity_name) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto it = entities.find(entity_name);
|
||||
if (it == entities.end())
|
||||
throw Exception(ErrorCodes::UNKNOWN_WORKLOAD_ENTITY,
|
||||
"The workload entity name '{}' is not saved",
|
||||
entity_name);
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
ASTPtr WorkloadEntityStorageBase::tryGet(const std::string & entity_name) const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto it = entities.find(entity_name);
|
||||
if (it == entities.end())
|
||||
return nullptr;
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::has(const String & entity_name) const
|
||||
{
|
||||
return tryGet(entity_name) != nullptr;
|
||||
}
|
||||
|
||||
std::vector<std::string> WorkloadEntityStorageBase::getAllEntityNames() const
|
||||
{
|
||||
std::vector<std::string> entity_names;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
entity_names.reserve(entities.size());
|
||||
|
||||
for (const auto & [name, _] : entities)
|
||||
entity_names.emplace_back(name);
|
||||
|
||||
return entity_names;
|
||||
}
|
||||
|
||||
std::vector<std::string> WorkloadEntityStorageBase::getAllEntityNames(WorkloadEntityType entity_type) const
|
||||
{
|
||||
std::vector<std::string> entity_names;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
for (const auto & [name, entity] : entities)
|
||||
{
|
||||
if (getEntityType(entity) == entity_type)
|
||||
entity_names.emplace_back(name);
|
||||
}
|
||||
|
||||
return entity_names;
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::empty() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return entities.empty();
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::storeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
create_entity_query = normalizeCreateWorkloadEntityQuery(*create_entity_query, global_context);
|
||||
|
||||
auto it = entities.find(entity_name);
|
||||
if (it != entities.end())
|
||||
{
|
||||
if (throw_if_exists)
|
||||
throw Exception(ErrorCodes::WORKLOAD_ENTITY_ALREADY_EXISTS, "Workload entity '{}' already exists", entity_name);
|
||||
else if (!replace_if_exists)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool stored = storeEntityImpl(
|
||||
current_context,
|
||||
entity_type,
|
||||
entity_name,
|
||||
create_entity_query,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
settings);
|
||||
|
||||
if (stored)
|
||||
{
|
||||
entities[entity_name] = create_entity_query;
|
||||
onEntityAdded(entity_type, entity_name, create_entity_query);
|
||||
}
|
||||
|
||||
sendNotifications();
|
||||
|
||||
return stored;
|
||||
}
|
||||
|
||||
bool WorkloadEntityStorageBase::removeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists)
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
auto it = entities.find(entity_name);
|
||||
if (it == entities.end())
|
||||
{
|
||||
if (throw_if_not_exists)
|
||||
throw Exception(ErrorCodes::UNKNOWN_WORKLOAD_ENTITY, "Workload entity '{}' doesn't exist", entity_name);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool removed = removeEntityImpl(
|
||||
current_context,
|
||||
entity_type,
|
||||
entity_name,
|
||||
throw_if_not_exists);
|
||||
|
||||
if (removed)
|
||||
{
|
||||
entities.erase(entity_name);
|
||||
onEntityRemoved(entity_type, entity_name);
|
||||
}
|
||||
|
||||
sendNotifications();
|
||||
|
||||
return removed;
|
||||
}
|
||||
|
||||
scope_guard WorkloadEntityStorageBase::subscribeForChanges(
|
||||
WorkloadEntityType entity_type,
|
||||
const OnChangedHandler & handler)
|
||||
{
|
||||
std::lock_guard lock{handlers->mutex};
|
||||
auto & list = handlers->by_type[static_cast<size_t>(entity_type)];
|
||||
list.push_back(handler);
|
||||
auto handler_it = std::prev(list.end());
|
||||
|
||||
return [my_handlers = handlers, entity_type, handler_it]
|
||||
{
|
||||
std::lock_guard lock2{my_handlers->mutex};
|
||||
auto & list2 = my_handlers->by_type[static_cast<size_t>(entity_type)];
|
||||
list2.erase(handler_it);
|
||||
};
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::onEntityAdded(WorkloadEntityType entity_type, const String & entity_name, const ASTPtr & new_entity)
|
||||
{
|
||||
std::lock_guard lock{queue_mutex};
|
||||
Event event;
|
||||
event.name = entity_name;
|
||||
event.type = entity_type;
|
||||
event.entity = new_entity;
|
||||
queue.push(std::move(event));
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::onEntityUpdated(WorkloadEntityType entity_type, const String & entity_name, const ASTPtr & changed_entity)
|
||||
{
|
||||
std::lock_guard lock{queue_mutex};
|
||||
Event event;
|
||||
event.name = entity_name;
|
||||
event.type = entity_type;
|
||||
event.entity = changed_entity;
|
||||
queue.push(std::move(event));
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::onEntityRemoved(WorkloadEntityType entity_type, const String & entity_name)
|
||||
{
|
||||
std::lock_guard lock{queue_mutex};
|
||||
Event event;
|
||||
event.name = entity_name;
|
||||
event.type = entity_type;
|
||||
queue.push(std::move(event));
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::sendNotifications()
|
||||
{
|
||||
/// Only one thread can send notification at any time.
|
||||
std::lock_guard sending_notifications_lock{sending_notifications};
|
||||
|
||||
std::unique_lock queue_lock{queue_mutex};
|
||||
while (!queue.empty())
|
||||
{
|
||||
auto event = std::move(queue.front());
|
||||
queue.pop();
|
||||
queue_lock.unlock();
|
||||
|
||||
std::vector<OnChangedHandler> current_handlers;
|
||||
{
|
||||
std::lock_guard handlers_lock{handlers->mutex};
|
||||
boost::range::copy(handlers->by_type[static_cast<size_t>(event.type)], std::back_inserter(current_handlers));
|
||||
}
|
||||
|
||||
for (const auto & handler : current_handlers)
|
||||
{
|
||||
try
|
||||
{
|
||||
handler(event.type, event.name, event.entity);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
|
||||
queue_lock.lock();
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_lock<std::recursive_mutex> WorkloadEntityStorageBase::getLock() const
|
||||
{
|
||||
return std::unique_lock{mutex};
|
||||
}
|
||||
|
||||
void WorkloadEntityStorageBase::setAllEntities(const std::vector<std::pair<String, ASTPtr>> & new_entities)
|
||||
{
|
||||
std::unordered_map<String, ASTPtr> normalized_entities;
|
||||
for (const auto & [entity_name, create_query] : new_entities)
|
||||
normalized_entities[entity_name] = normalizeCreateWorkloadEntityQuery(*create_query, global_context);
|
||||
|
||||
// TODO(serxa): do validation and throw LOGICAL_ERROR if failed
|
||||
|
||||
// Note that notifications are not sent, because it is hard to send notifications in right order to maintain invariants.
|
||||
// Another code path using getAllEntities() should be used for initialization
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
entities = std::move(normalized_entities);
|
||||
}
|
||||
|
||||
std::vector<std::pair<String, ASTPtr>> WorkloadEntityStorageBase::getAllEntities() const
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
std::vector<std::pair<String, ASTPtr>> all_entities;
|
||||
all_entities.reserve(entities.size());
|
||||
std::copy(entities.begin(), entities.end(), std::back_inserter(all_entities));
|
||||
return all_entities;
|
||||
}
|
||||
|
||||
// TODO(serxa): add notifications or remove this function
|
||||
void WorkloadEntityStorageBase::removeAllEntitiesExcept(const Strings & entity_names_to_keep)
|
||||
{
|
||||
boost::container::flat_set<std::string_view> names_set_to_keep{entity_names_to_keep.begin(), entity_names_to_keep.end()};
|
||||
std::lock_guard lock(mutex);
|
||||
for (auto it = entities.begin(); it != entities.end();)
|
||||
{
|
||||
auto current = it++;
|
||||
if (!names_set_to_keep.contains(current->first))
|
||||
entities.erase(current);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
109
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.h
Normal file
109
src/Common/Scheduler/Workload/WorkloadEntityStorageBase.h
Normal file
@ -0,0 +1,109 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class WorkloadEntityStorageBase : public IWorkloadEntityStorage
|
||||
{
|
||||
public:
|
||||
explicit WorkloadEntityStorageBase(ContextPtr global_context_);
|
||||
ASTPtr get(const String & entity_name) const override;
|
||||
|
||||
ASTPtr tryGet(const String & entity_name) const override;
|
||||
|
||||
bool has(const String & entity_name) const override;
|
||||
|
||||
std::vector<String> getAllEntityNames() const override;
|
||||
std::vector<String> getAllEntityNames(WorkloadEntityType entity_type) const override;
|
||||
|
||||
std::vector<std::pair<String, ASTPtr>> getAllEntities() const override;
|
||||
|
||||
bool empty() const override;
|
||||
|
||||
bool storeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) override;
|
||||
|
||||
bool removeEntity(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) override;
|
||||
|
||||
virtual scope_guard subscribeForChanges(
|
||||
WorkloadEntityType entity_type,
|
||||
const OnChangedHandler & handler) override;
|
||||
|
||||
protected:
|
||||
virtual bool storeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
ASTPtr create_entity_query,
|
||||
bool throw_if_exists,
|
||||
bool replace_if_exists,
|
||||
const Settings & settings) = 0;
|
||||
|
||||
virtual bool removeEntityImpl(
|
||||
const ContextPtr & current_context,
|
||||
WorkloadEntityType entity_type,
|
||||
const String & entity_name,
|
||||
bool throw_if_not_exists) = 0;
|
||||
|
||||
std::unique_lock<std::recursive_mutex> getLock() const;
|
||||
void setAllEntities(const std::vector<std::pair<String, ASTPtr>> & new_entities);
|
||||
void removeAllEntitiesExcept(const Strings & entity_names_to_keep);
|
||||
|
||||
/// Called by derived class after a new workload entity has been added.
|
||||
void onEntityAdded(WorkloadEntityType entity_type, const String & entity_name, const ASTPtr & new_entity);
|
||||
|
||||
/// Called by derived class after an workload entity has been changed.
|
||||
void onEntityUpdated(WorkloadEntityType entity_type, const String & entity_name, const ASTPtr & changed_entity);
|
||||
|
||||
/// Called by derived class after an workload entity has been removed.
|
||||
void onEntityRemoved(WorkloadEntityType entity_type, const String & entity_name);
|
||||
|
||||
/// Sends notifications to subscribers about changes in workload entities
|
||||
/// (added with previous calls onEntityAdded(), onEntityUpdated(), onEntityRemoved()).
|
||||
void sendNotifications();
|
||||
|
||||
struct Handlers
|
||||
{
|
||||
std::mutex mutex;
|
||||
std::list<OnChangedHandler> by_type[static_cast<size_t>(WorkloadEntityType::MAX)];
|
||||
};
|
||||
/// shared_ptr is here for safety because WorkloadEntityStorageBase can be destroyed before all subscriptions are removed.
|
||||
std::shared_ptr<Handlers> handlers;
|
||||
|
||||
struct Event
|
||||
{
|
||||
WorkloadEntityType type;
|
||||
String name;
|
||||
ASTPtr entity;
|
||||
};
|
||||
std::queue<Event> queue;
|
||||
std::mutex queue_mutex;
|
||||
std::mutex sending_notifications;
|
||||
|
||||
mutable std::recursive_mutex mutex;
|
||||
std::unordered_map<String, ASTPtr> entities; // Maps entity name into CREATE entity query
|
||||
|
||||
ContextPtr global_context;
|
||||
};
|
||||
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
#include <Common/Scheduler/Workload/createWorkloadEntityStorage.h>
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityDiskStorage.h>
|
||||
#include <Common/Scheduler/Workload/WorkloadEntityKeeperStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
}
|
||||
|
||||
std::unique_ptr<IWorkloadEntityStorage> createWorkloadEntityStorage(const ContextMutablePtr & global_context)
|
||||
{
|
||||
const String zookeeper_path_key = "workload_zookeeper_path";
|
||||
const String disk_path_key = "workload_path";
|
||||
|
||||
const auto & config = global_context->getConfigRef();
|
||||
if (config.has(zookeeper_path_key))
|
||||
{
|
||||
if (config.has(disk_path_key))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::INVALID_CONFIG_PARAMETER,
|
||||
"'{}' and '{}' must not be both specified in the config",
|
||||
zookeeper_path_key,
|
||||
disk_path_key);
|
||||
}
|
||||
abort(); // TODO(serxa): crate WorkloadEntityKeeperStorage object
|
||||
//return std::make_unique<WorkloadEntityKeeperStorage>(global_context, config.getString(zookeeper_path_key));
|
||||
}
|
||||
else
|
||||
{
|
||||
String default_path = fs::path{global_context->getPath()} / "workload" / "";
|
||||
String path = config.getString(disk_path_key, default_path);
|
||||
return std::make_unique<WorkloadEntityDiskStorage>(global_context, path);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
11
src/Common/Scheduler/Workload/createWorkloadEntityStorage.h
Normal file
11
src/Common/Scheduler/Workload/createWorkloadEntityStorage.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
std::unique_ptr<IWorkloadEntityStorage> createWorkloadEntityStorage(const ContextMutablePtr & global_context);
|
||||
|
||||
}
|
17
src/Common/Scheduler/createResourceManager.cpp
Normal file
17
src/Common/Scheduler/createResourceManager.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
#include <Common/Scheduler/createResourceManager.h>
|
||||
#include <Common/Scheduler/Nodes/DynamicResourceManager.h>
|
||||
#include <Common/Scheduler/Nodes/IOResourceManager.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ResourceManagerPtr createResourceManager(const ContextMutablePtr & global_context)
|
||||
{
|
||||
// TODO(serxa): combine DynamicResourceManager and IOResourceManaged to work together, because now old ResourceManager is disabled
|
||||
// const auto & config = global_context->getConfigRef();
|
||||
return std::make_shared<IOResourceManager>(global_context->getWorkloadEntityStorage());
|
||||
}
|
||||
|
||||
}
|
11
src/Common/Scheduler/createResourceManager.h
Normal file
11
src/Common/Scheduler/createResourceManager.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <Common/Scheduler/IResourceManager.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ResourceManagerPtr createResourceManager(const ContextMutablePtr & global_context);
|
||||
|
||||
}
|
@ -181,12 +181,6 @@ void SetACLRequest::addRootPath(const String & root_path) { Coordination::addRoo
|
||||
void GetACLRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
|
||||
void SyncRequest::addRootPath(const String & root_path) { Coordination::addRootPath(path, root_path); }
|
||||
|
||||
void MultiRequest::addRootPath(const String & root_path)
|
||||
{
|
||||
for (auto & request : requests)
|
||||
request->addRootPath(root_path);
|
||||
}
|
||||
|
||||
void CreateResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path_created, root_path); }
|
||||
void WatchResponse::removeRootPath(const String & root_path) { Coordination::removeRootPath(path, root_path); }
|
||||
|
||||
|
@ -408,11 +408,17 @@ struct ReconfigResponse : virtual Response
|
||||
size_t bytesSize() const override { return value.size() + sizeof(stat); }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MultiRequest : virtual Request
|
||||
{
|
||||
Requests requests;
|
||||
std::vector<T> requests;
|
||||
|
||||
void addRootPath(const String & root_path) override
|
||||
{
|
||||
for (auto & request : requests)
|
||||
request->addRootPath(root_path);
|
||||
}
|
||||
|
||||
void addRootPath(const String & root_path) override;
|
||||
String getPath() const override { return {}; }
|
||||
|
||||
size_t bytesSize() const override
|
||||
|
@ -184,7 +184,7 @@ struct TestKeeperReconfigRequest final : ReconfigRequest, TestKeeperRequest
|
||||
std::pair<ResponsePtr, Undo> process(TestKeeper::Container & container, int64_t zxid) const override;
|
||||
};
|
||||
|
||||
struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest
|
||||
struct TestKeeperMultiRequest final : MultiRequest<RequestPtr>, TestKeeperRequest
|
||||
{
|
||||
explicit TestKeeperMultiRequest(const Requests & generic_requests)
|
||||
: TestKeeperMultiRequest(std::span(generic_requests))
|
||||
|
@ -18,14 +18,16 @@ using namespace DB;
|
||||
|
||||
void ZooKeeperResponse::write(WriteBuffer & out) const
|
||||
{
|
||||
/// Excessive copy to calculate length.
|
||||
WriteBufferFromOwnString buf;
|
||||
Coordination::write(xid, buf);
|
||||
Coordination::write(zxid, buf);
|
||||
Coordination::write(error, buf);
|
||||
auto response_size = Coordination::size(xid) + Coordination::size(zxid) + Coordination::size(error);
|
||||
if (error == Error::ZOK)
|
||||
writeImpl(buf);
|
||||
Coordination::write(buf.str(), out);
|
||||
response_size += sizeImpl();
|
||||
|
||||
Coordination::write(static_cast<int32_t>(response_size), out);
|
||||
Coordination::write(xid, out);
|
||||
Coordination::write(zxid, out);
|
||||
Coordination::write(error, out);
|
||||
if (error == Error::ZOK)
|
||||
writeImpl(out);
|
||||
}
|
||||
|
||||
std::string ZooKeeperRequest::toString(bool short_format) const
|
||||
@ -41,12 +43,12 @@ std::string ZooKeeperRequest::toString(bool short_format) const
|
||||
|
||||
void ZooKeeperRequest::write(WriteBuffer & out) const
|
||||
{
|
||||
/// Excessive copy to calculate length.
|
||||
WriteBufferFromOwnString buf;
|
||||
Coordination::write(xid, buf);
|
||||
Coordination::write(getOpNum(), buf);
|
||||
writeImpl(buf);
|
||||
Coordination::write(buf.str(), out);
|
||||
auto request_size = Coordination::size(xid) + Coordination::size(getOpNum()) + sizeImpl();
|
||||
|
||||
Coordination::write(static_cast<int32_t>(request_size), out);
|
||||
Coordination::write(xid, out);
|
||||
Coordination::write(getOpNum(), out);
|
||||
writeImpl(out);
|
||||
}
|
||||
|
||||
void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
|
||||
@ -54,6 +56,11 @@ void ZooKeeperSyncRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSyncRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path);
|
||||
}
|
||||
|
||||
void ZooKeeperSyncRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -74,6 +81,11 @@ void ZooKeeperSyncResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSyncResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path);
|
||||
}
|
||||
|
||||
void ZooKeeperReconfigRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(joining, out);
|
||||
@ -82,6 +94,11 @@ void ZooKeeperReconfigRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(version, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperReconfigRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(joining) + Coordination::size(leaving) + Coordination::size(new_members) + Coordination::size(version);
|
||||
}
|
||||
|
||||
void ZooKeeperReconfigRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(joining, in);
|
||||
@ -109,6 +126,11 @@ void ZooKeeperReconfigResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperReconfigResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(value) + Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperWatchResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(type, in);
|
||||
@ -123,6 +145,11 @@ void ZooKeeperWatchResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperWatchResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(type) + Coordination::size(state) + Coordination::size(path);
|
||||
}
|
||||
|
||||
void ZooKeeperWatchResponse::write(WriteBuffer & out) const
|
||||
{
|
||||
if (error == Error::ZOK)
|
||||
@ -137,6 +164,11 @@ void ZooKeeperAuthRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(data, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperAuthRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(type) + Coordination::size(scheme) + Coordination::size(data);
|
||||
}
|
||||
|
||||
void ZooKeeperAuthRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(type, in);
|
||||
@ -175,6 +207,12 @@ void ZooKeeperCreateRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(flags, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperCreateRequest::sizeImpl() const
|
||||
{
|
||||
int32_t flags = 0;
|
||||
return Coordination::size(path) + Coordination::size(data) + Coordination::size(acls) + Coordination::size(flags);
|
||||
}
|
||||
|
||||
void ZooKeeperCreateRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -211,12 +249,22 @@ void ZooKeeperCreateResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(path_created, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperCreateResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path_created);
|
||||
}
|
||||
|
||||
void ZooKeeperRemoveRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
Coordination::write(version, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperRemoveRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(version);
|
||||
}
|
||||
|
||||
std::string ZooKeeperRemoveRequest::toStringImpl(bool /*short_format*/) const
|
||||
{
|
||||
return fmt::format(
|
||||
@ -244,6 +292,11 @@ void ZooKeeperRemoveRecursiveRequest::readImpl(ReadBuffer & in)
|
||||
Coordination::read(remove_nodes_limit, in);
|
||||
}
|
||||
|
||||
size_t ZooKeeperRemoveRecursiveRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(remove_nodes_limit);
|
||||
}
|
||||
|
||||
std::string ZooKeeperRemoveRecursiveRequest::toStringImpl(bool /*short_format*/) const
|
||||
{
|
||||
return fmt::format(
|
||||
@ -259,6 +312,11 @@ void ZooKeeperExistsRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(has_watch, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperExistsRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(has_watch);
|
||||
}
|
||||
|
||||
void ZooKeeperExistsRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -280,12 +338,22 @@ void ZooKeeperExistsResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperExistsResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperGetRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
Coordination::write(has_watch, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperGetRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(has_watch);
|
||||
}
|
||||
|
||||
void ZooKeeperGetRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -309,6 +377,11 @@ void ZooKeeperGetResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperGetResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(data) + Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperSetRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
@ -316,6 +389,11 @@ void ZooKeeperSetRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(version, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSetRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(data) + Coordination::size(version);
|
||||
}
|
||||
|
||||
void ZooKeeperSetRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -342,12 +420,22 @@ void ZooKeeperSetResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSetResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperListRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
Coordination::write(has_watch, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperListRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(has_watch);
|
||||
}
|
||||
|
||||
void ZooKeeperListRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -366,6 +454,11 @@ void ZooKeeperFilteredListRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(static_cast<uint8_t>(list_request_type), out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperFilteredListRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(has_watch) + Coordination::size(static_cast<uint8_t>(list_request_type));
|
||||
}
|
||||
|
||||
void ZooKeeperFilteredListRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -397,6 +490,11 @@ void ZooKeeperListResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperListResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(names) + Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperSimpleListResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(names, in);
|
||||
@ -407,6 +505,11 @@ void ZooKeeperSimpleListResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(names, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSimpleListResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(names);
|
||||
}
|
||||
|
||||
void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
Coordination::write(path, out);
|
||||
@ -414,6 +517,11 @@ void ZooKeeperSetACLRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(version, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSetACLRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(acls) + Coordination::size(version);
|
||||
}
|
||||
|
||||
void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -431,6 +539,11 @@ void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSetACLResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperSetACLResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(stat, in);
|
||||
@ -446,6 +559,11 @@ void ZooKeeperGetACLRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(path, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperGetACLRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path);
|
||||
}
|
||||
|
||||
std::string ZooKeeperGetACLRequest::toStringImpl(bool /*short_format*/) const
|
||||
{
|
||||
return fmt::format("path = {}", path);
|
||||
@ -457,6 +575,11 @@ void ZooKeeperGetACLResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(stat, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperGetACLResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(acl) + Coordination::size(stat);
|
||||
}
|
||||
|
||||
void ZooKeeperGetACLResponse::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(acl, in);
|
||||
@ -469,6 +592,11 @@ void ZooKeeperCheckRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(version, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperCheckRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(path) + Coordination::size(version);
|
||||
}
|
||||
|
||||
void ZooKeeperCheckRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(path, in);
|
||||
@ -494,6 +622,11 @@ void ZooKeeperErrorResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(error, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperErrorResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(error);
|
||||
}
|
||||
|
||||
void ZooKeeperMultiRequest::checkOperationType(OperationType type)
|
||||
{
|
||||
chassert(!operation_type.has_value() || *operation_type == type);
|
||||
@ -596,6 +729,27 @@ void ZooKeeperMultiRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(error, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperMultiRequest::sizeImpl() const
|
||||
{
|
||||
size_t total_size = 0;
|
||||
for (const auto & request : requests)
|
||||
{
|
||||
const auto & zk_request = dynamic_cast<const ZooKeeperRequest &>(*request);
|
||||
|
||||
bool done = false;
|
||||
int32_t error = -1;
|
||||
|
||||
total_size
|
||||
+= Coordination::size(zk_request.getOpNum()) + Coordination::size(done) + Coordination::size(error) + zk_request.sizeImpl();
|
||||
}
|
||||
|
||||
OpNum op_num = OpNum::Error;
|
||||
bool done = true;
|
||||
int32_t error = -1;
|
||||
|
||||
return total_size + Coordination::size(op_num) + Coordination::size(done) + Coordination::size(error);
|
||||
}
|
||||
|
||||
void ZooKeeperMultiRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
while (true)
|
||||
@ -729,31 +883,54 @@ void ZooKeeperMultiResponse::writeImpl(WriteBuffer & out) const
|
||||
}
|
||||
}
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperHeartbeatResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSyncResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperAuthResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperRemoveRecursiveRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperRemoveRecursiveResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperExistsResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperReconfigRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperReconfigResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperListResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSimpleListResponse>()); }
|
||||
size_t ZooKeeperMultiResponse::sizeImpl() const
|
||||
{
|
||||
size_t total_size = 0;
|
||||
for (const auto & response : responses)
|
||||
{
|
||||
const ZooKeeperResponse & zk_response = dynamic_cast<const ZooKeeperResponse &>(*response);
|
||||
OpNum op_num = zk_response.getOpNum();
|
||||
bool done = false;
|
||||
Error op_error = zk_response.error;
|
||||
|
||||
total_size += Coordination::size(op_num) + Coordination::size(done) + Coordination::size(op_error);
|
||||
if (op_error == Error::ZOK || op_num == OpNum::Error)
|
||||
total_size += zk_response.sizeImpl();
|
||||
}
|
||||
|
||||
/// Footer.
|
||||
OpNum op_num = OpNum::Error;
|
||||
bool done = true;
|
||||
int32_t error_read = - 1;
|
||||
|
||||
return total_size + Coordination::size(op_num) + Coordination::size(done) + Coordination::size(error_read);
|
||||
}
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperHeartbeatRequest::makeResponse() const { return std::make_shared<ZooKeeperHeartbeatResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperSyncRequest::makeResponse() const { return std::make_shared<ZooKeeperSyncResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperAuthRequest::makeResponse() const { return std::make_shared<ZooKeeperAuthResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperRemoveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperRemoveRecursiveRequest::makeResponse() const { return std::make_shared<ZooKeeperRemoveRecursiveResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperExistsRequest::makeResponse() const { return std::make_shared<ZooKeeperExistsResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return std::make_shared<ZooKeeperGetResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return std::make_shared<ZooKeeperSetResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperReconfigRequest::makeResponse() const { return std::make_shared<ZooKeeperReconfigResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return std::make_shared<ZooKeeperListResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return std::make_shared<ZooKeeperSimpleListResponse>(); }
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperCreateRequest::makeResponse() const
|
||||
{
|
||||
if (not_exists)
|
||||
return setTime(std::make_shared<ZooKeeperCreateIfNotExistsResponse>());
|
||||
return setTime(std::make_shared<ZooKeeperCreateResponse>());
|
||||
return std::make_shared<ZooKeeperCreateIfNotExistsResponse>();
|
||||
return std::make_shared<ZooKeeperCreateResponse>();
|
||||
}
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const
|
||||
{
|
||||
if (not_exists)
|
||||
return setTime(std::make_shared<ZooKeeperCheckNotExistsResponse>());
|
||||
return std::make_shared<ZooKeeperCheckNotExistsResponse>();
|
||||
|
||||
return setTime(std::make_shared<ZooKeeperCheckResponse>());
|
||||
return std::make_shared<ZooKeeperCheckResponse>();
|
||||
}
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const
|
||||
@ -764,11 +941,12 @@ ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const
|
||||
else
|
||||
response = std::make_shared<ZooKeeperMultiReadResponse>(requests);
|
||||
|
||||
return setTime(std::move(response));
|
||||
return std::move(response);
|
||||
}
|
||||
ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperCloseResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperSetACLResponse>()); }
|
||||
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return setTime(std::make_shared<ZooKeeperGetACLResponse>()); }
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperCloseRequest::makeResponse() const { return std::make_shared<ZooKeeperCloseResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperSetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperSetACLResponse>(); }
|
||||
ZooKeeperResponsePtr ZooKeeperGetACLRequest::makeResponse() const { return std::make_shared<ZooKeeperGetACLResponse>(); }
|
||||
|
||||
void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
|
||||
{
|
||||
@ -777,6 +955,11 @@ void ZooKeeperSessionIDRequest::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(server_id, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSessionIDRequest::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(internal_id) + Coordination::size(session_timeout_ms) + Coordination::size(server_id);
|
||||
}
|
||||
|
||||
void ZooKeeperSessionIDRequest::readImpl(ReadBuffer & in)
|
||||
{
|
||||
Coordination::read(internal_id, in);
|
||||
@ -803,6 +986,11 @@ void ZooKeeperSessionIDResponse::writeImpl(WriteBuffer & out) const
|
||||
Coordination::write(server_id, out);
|
||||
}
|
||||
|
||||
size_t ZooKeeperSessionIDResponse::sizeImpl() const
|
||||
{
|
||||
return Coordination::size(internal_id) + Coordination::size(session_id) + Coordination::size(server_id);
|
||||
}
|
||||
|
||||
|
||||
void ZooKeeperRequest::createLogElements(LogElements & elems) const
|
||||
{
|
||||
@ -960,40 +1148,6 @@ std::shared_ptr<ZooKeeperRequest> ZooKeeperRequest::read(ReadBuffer & in)
|
||||
return request;
|
||||
}
|
||||
|
||||
ZooKeeperRequest::~ZooKeeperRequest()
|
||||
{
|
||||
if (!request_created_time_ns)
|
||||
return;
|
||||
UInt64 elapsed_ns = clock_gettime_ns() - request_created_time_ns;
|
||||
constexpr UInt64 max_request_time_ns = 1000000000ULL; /// 1 sec
|
||||
if (max_request_time_ns < elapsed_ns)
|
||||
{
|
||||
LOG_TEST(getLogger(__PRETTY_FUNCTION__), "Processing of request xid={} took {} ms", xid, elapsed_ns / 1000000UL);
|
||||
}
|
||||
}
|
||||
|
||||
ZooKeeperResponsePtr ZooKeeperRequest::setTime(ZooKeeperResponsePtr response) const
|
||||
{
|
||||
if (request_created_time_ns)
|
||||
{
|
||||
response->response_created_time_ns = clock_gettime_ns();
|
||||
}
|
||||
return response;
|
||||
}
|
||||
|
||||
ZooKeeperResponse::~ZooKeeperResponse()
|
||||
{
|
||||
if (!response_created_time_ns)
|
||||
return;
|
||||
UInt64 elapsed_ns = clock_gettime_ns() - response_created_time_ns;
|
||||
constexpr UInt64 max_request_time_ns = 1000000000ULL; /// 1 sec
|
||||
if (max_request_time_ns < elapsed_ns)
|
||||
{
|
||||
LOG_TEST(getLogger(__PRETTY_FUNCTION__), "Processing of response xid={} took {} ms", xid, elapsed_ns / 1000000UL);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ZooKeeperRequestPtr ZooKeeperRequestFactory::get(OpNum op_num) const
|
||||
{
|
||||
auto it = op_num_to_request.find(op_num);
|
||||
@ -1015,7 +1169,6 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory)
|
||||
factory.registerRequest(num, []
|
||||
{
|
||||
auto res = std::make_shared<RequestT>();
|
||||
res->request_created_time_ns = clock_gettime_ns();
|
||||
|
||||
if constexpr (num == OpNum::MultiRead)
|
||||
res->operation_type = ZooKeeperMultiRequest::OperationType::Read;
|
||||
|
@ -7,13 +7,11 @@
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <functional>
|
||||
#include <span>
|
||||
|
||||
|
||||
namespace Coordination
|
||||
@ -25,13 +23,11 @@ struct ZooKeeperResponse : virtual Response
|
||||
{
|
||||
XID xid = 0;
|
||||
|
||||
UInt64 response_created_time_ns = 0;
|
||||
|
||||
ZooKeeperResponse() = default;
|
||||
ZooKeeperResponse(const ZooKeeperResponse &) = default;
|
||||
~ZooKeeperResponse() override;
|
||||
virtual void readImpl(ReadBuffer &) = 0;
|
||||
virtual void writeImpl(WriteBuffer &) const = 0;
|
||||
virtual size_t sizeImpl() const = 0;
|
||||
virtual void write(WriteBuffer & out) const;
|
||||
virtual OpNum getOpNum() const = 0;
|
||||
virtual void fillLogElements(LogElements & elems, size_t idx) const;
|
||||
@ -51,13 +47,11 @@ struct ZooKeeperRequest : virtual Request
|
||||
|
||||
bool restored_from_zookeeper_log = false;
|
||||
|
||||
UInt64 request_created_time_ns = 0;
|
||||
UInt64 thread_id = 0;
|
||||
String query_id;
|
||||
|
||||
ZooKeeperRequest() = default;
|
||||
ZooKeeperRequest(const ZooKeeperRequest &) = default;
|
||||
~ZooKeeperRequest() override;
|
||||
|
||||
virtual OpNum getOpNum() const = 0;
|
||||
|
||||
@ -66,6 +60,7 @@ struct ZooKeeperRequest : virtual Request
|
||||
std::string toString(bool short_format = false) const;
|
||||
|
||||
virtual void writeImpl(WriteBuffer &) const = 0;
|
||||
virtual size_t sizeImpl() const = 0;
|
||||
virtual void readImpl(ReadBuffer &) = 0;
|
||||
|
||||
virtual std::string toStringImpl(bool /*short_format*/) const { return ""; }
|
||||
@ -73,7 +68,6 @@ struct ZooKeeperRequest : virtual Request
|
||||
static std::shared_ptr<ZooKeeperRequest> read(ReadBuffer & in);
|
||||
|
||||
virtual ZooKeeperResponsePtr makeResponse() const = 0;
|
||||
ZooKeeperResponsePtr setTime(ZooKeeperResponsePtr response) const;
|
||||
virtual bool isReadRequest() const = 0;
|
||||
|
||||
virtual void createLogElements(LogElements & elems) const;
|
||||
@ -86,6 +80,7 @@ struct ZooKeeperHeartbeatRequest final : ZooKeeperRequest
|
||||
String getPath() const override { return {}; }
|
||||
OpNum getOpNum() const override { return OpNum::Heartbeat; }
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
bool isReadRequest() const override { return false; }
|
||||
@ -97,6 +92,7 @@ struct ZooKeeperSyncRequest final : ZooKeeperRequest
|
||||
String getPath() const override { return path; }
|
||||
OpNum getOpNum() const override { return OpNum::Sync; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -109,6 +105,7 @@ struct ZooKeeperSyncResponse final : SyncResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::Sync; }
|
||||
};
|
||||
|
||||
@ -122,6 +119,7 @@ struct ZooKeeperReconfigRequest final : ZooKeeperRequest
|
||||
String getPath() const override { return keeper_config_path; }
|
||||
OpNum getOpNum() const override { return OpNum::Reconfig; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -138,6 +136,7 @@ struct ZooKeeperReconfigResponse final : ReconfigResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::Reconfig; }
|
||||
};
|
||||
|
||||
@ -145,6 +144,7 @@ struct ZooKeeperHeartbeatResponse final : ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
OpNum getOpNum() const override { return OpNum::Heartbeat; }
|
||||
};
|
||||
|
||||
@ -153,6 +153,7 @@ struct ZooKeeperWatchResponse final : WatchResponse, ZooKeeperResponse
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
|
||||
void write(WriteBuffer & out) const override;
|
||||
|
||||
@ -175,6 +176,7 @@ struct ZooKeeperAuthRequest final : ZooKeeperRequest
|
||||
String getPath() const override { return {}; }
|
||||
OpNum getOpNum() const override { return OpNum::Auth; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -189,6 +191,7 @@ struct ZooKeeperAuthResponse final : ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Auth; }
|
||||
|
||||
@ -200,6 +203,7 @@ struct ZooKeeperCloseRequest final : ZooKeeperRequest
|
||||
String getPath() const override { return {}; }
|
||||
OpNum getOpNum() const override { return OpNum::Close; }
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -214,6 +218,7 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse
|
||||
}
|
||||
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Close; }
|
||||
};
|
||||
@ -228,6 +233,7 @@ struct ZooKeeperCreateRequest final : public CreateRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return not_exists ? OpNum::CreateIfNotExists : OpNum::Create; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -244,6 +250,7 @@ struct ZooKeeperCreateResponse : CreateResponse, ZooKeeperResponse
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Create; }
|
||||
|
||||
@ -265,6 +272,7 @@ struct ZooKeeperRemoveRequest final : RemoveRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Remove; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -280,6 +288,7 @@ struct ZooKeeperRemoveResponse final : RemoveResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
OpNum getOpNum() const override { return OpNum::Remove; }
|
||||
|
||||
size_t bytesSize() const override { return RemoveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -293,6 +302,7 @@ struct ZooKeeperRemoveRecursiveRequest final : RemoveRecursiveRequest, ZooKeeper
|
||||
OpNum getOpNum() const override { return OpNum::RemoveRecursive; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
size_t sizeImpl() const override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -305,6 +315,7 @@ struct ZooKeeperRemoveRecursiveResponse : RemoveRecursiveResponse, ZooKeeperResp
|
||||
{
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
OpNum getOpNum() const override { return OpNum::RemoveRecursive; }
|
||||
|
||||
size_t bytesSize() const override { return RemoveRecursiveResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -317,6 +328,7 @@ struct ZooKeeperExistsRequest final : ExistsRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Exists; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -330,6 +342,7 @@ struct ZooKeeperExistsResponse final : ExistsResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::Exists; }
|
||||
|
||||
size_t bytesSize() const override { return ExistsResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -344,6 +357,7 @@ struct ZooKeeperGetRequest final : GetRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Get; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -357,6 +371,7 @@ struct ZooKeeperGetResponse final : GetResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::Get; }
|
||||
|
||||
size_t bytesSize() const override { return GetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -371,6 +386,7 @@ struct ZooKeeperSetRequest final : SetRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Set; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -385,6 +401,7 @@ struct ZooKeeperSetResponse final : SetResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::Set; }
|
||||
|
||||
size_t bytesSize() const override { return SetResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -399,6 +416,7 @@ struct ZooKeeperListRequest : ListRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::List; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -419,6 +437,7 @@ struct ZooKeeperFilteredListRequest final : ZooKeeperListRequest
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::FilteredList; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -429,6 +448,7 @@ struct ZooKeeperListResponse : ListResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::List; }
|
||||
|
||||
size_t bytesSize() const override { return ListResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -440,6 +460,7 @@ struct ZooKeeperSimpleListResponse final : ZooKeeperListResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::SimpleList; }
|
||||
|
||||
size_t bytesSize() const override { return ZooKeeperListResponse::bytesSize() - sizeof(stat); }
|
||||
@ -452,6 +473,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest
|
||||
|
||||
OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -467,6 +489,7 @@ struct ZooKeeperCheckResponse : CheckResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer &) override {}
|
||||
void writeImpl(WriteBuffer &) const override {}
|
||||
size_t sizeImpl() const override { return 0; }
|
||||
OpNum getOpNum() const override { return OpNum::Check; }
|
||||
|
||||
size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -483,6 +506,7 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
|
||||
OpNum getOpNum() const override { return OpNum::Error; }
|
||||
|
||||
@ -493,6 +517,7 @@ struct ZooKeeperSetACLRequest final : SetACLRequest, ZooKeeperRequest
|
||||
{
|
||||
OpNum getOpNum() const override { return OpNum::SetACL; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -505,6 +530,7 @@ struct ZooKeeperSetACLResponse final : SetACLResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::SetACL; }
|
||||
|
||||
size_t bytesSize() const override { return SetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
@ -514,6 +540,7 @@ struct ZooKeeperGetACLRequest final : GetACLRequest, ZooKeeperRequest
|
||||
{
|
||||
OpNum getOpNum() const override { return OpNum::GetACL; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -526,12 +553,13 @@ struct ZooKeeperGetACLResponse final : GetACLResponse, ZooKeeperResponse
|
||||
{
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
OpNum getOpNum() const override { return OpNum::GetACL; }
|
||||
|
||||
size_t bytesSize() const override { return GetACLResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
};
|
||||
|
||||
struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
|
||||
struct ZooKeeperMultiRequest final : MultiRequest<ZooKeeperRequestPtr>, ZooKeeperRequest
|
||||
{
|
||||
OpNum getOpNum() const override;
|
||||
ZooKeeperMultiRequest() = default;
|
||||
@ -540,6 +568,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest
|
||||
ZooKeeperMultiRequest(std::span<const Coordination::RequestPtr> generic_requests, const ACLs & default_acls);
|
||||
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
std::string toStringImpl(bool short_format) const override;
|
||||
|
||||
@ -563,12 +592,14 @@ private:
|
||||
|
||||
struct ZooKeeperMultiResponse : MultiResponse, ZooKeeperResponse
|
||||
{
|
||||
explicit ZooKeeperMultiResponse(const Requests & requests)
|
||||
ZooKeeperMultiResponse() = default;
|
||||
|
||||
explicit ZooKeeperMultiResponse(const std::vector<ZooKeeperRequestPtr> & requests)
|
||||
{
|
||||
responses.reserve(requests.size());
|
||||
|
||||
for (const auto & request : requests)
|
||||
responses.emplace_back(dynamic_cast<const ZooKeeperRequest &>(*request).makeResponse());
|
||||
responses.emplace_back(request->makeResponse());
|
||||
}
|
||||
|
||||
explicit ZooKeeperMultiResponse(const Responses & responses_)
|
||||
@ -579,6 +610,7 @@ struct ZooKeeperMultiResponse : MultiResponse, ZooKeeperResponse
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
|
||||
size_t bytesSize() const override { return MultiResponse::bytesSize() + sizeof(xid) + sizeof(zxid); }
|
||||
|
||||
@ -609,6 +641,7 @@ struct ZooKeeperSessionIDRequest final : ZooKeeperRequest
|
||||
Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
|
||||
String getPath() const override { return {}; }
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
Coordination::ZooKeeperResponsePtr makeResponse() const override;
|
||||
@ -627,6 +660,7 @@ struct ZooKeeperSessionIDResponse final : ZooKeeperResponse
|
||||
void readImpl(ReadBuffer & in) override;
|
||||
|
||||
void writeImpl(WriteBuffer & out) const override;
|
||||
size_t sizeImpl() const override;
|
||||
|
||||
Coordination::OpNum getOpNum() const override { return OpNum::SessionID; }
|
||||
};
|
||||
|
@ -42,6 +42,32 @@ void write(const Error & x, WriteBuffer & out)
|
||||
write(static_cast<int32_t>(x), out);
|
||||
}
|
||||
|
||||
size_t size(OpNum x)
|
||||
{
|
||||
return size(static_cast<int32_t>(x));
|
||||
}
|
||||
|
||||
size_t size(const std::string & s)
|
||||
{
|
||||
return size(static_cast<int32_t>(s.size())) + s.size();
|
||||
}
|
||||
|
||||
size_t size(const ACL & acl)
|
||||
{
|
||||
return size(acl.permissions) + size(acl.scheme) + size(acl.id);
|
||||
}
|
||||
|
||||
size_t size(const Stat & stat)
|
||||
{
|
||||
return size(stat.czxid) + size(stat.mzxid) + size(stat.ctime) + size(stat.mtime) + size(stat.version) + size(stat.cversion)
|
||||
+ size(stat.aversion) + size(stat.ephemeralOwner) + size(stat.dataLength) + size(stat.numChildren) + size(stat.pzxid);
|
||||
}
|
||||
|
||||
size_t size(const Error & x)
|
||||
{
|
||||
return size(static_cast<int32_t>(x));
|
||||
}
|
||||
|
||||
void read(OpNum & x, ReadBuffer & in)
|
||||
{
|
||||
int32_t raw_op_num;
|
||||
|
@ -43,6 +43,36 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
|
||||
write(elem, out);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
requires is_arithmetic_v<T>
|
||||
size_t size(T x)
|
||||
{
|
||||
return sizeof(x);
|
||||
}
|
||||
|
||||
size_t size(OpNum x);
|
||||
size_t size(const std::string & s);
|
||||
size_t size(const ACL & acl);
|
||||
size_t size(const Stat & stat);
|
||||
size_t size(const Error & x);
|
||||
|
||||
template <size_t N>
|
||||
size_t size(const std::array<char, N>)
|
||||
{
|
||||
return size(static_cast<int32_t>(N)) + N;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t size(const std::vector<T> & arr)
|
||||
{
|
||||
size_t total_size = size(static_cast<int32_t>(arr.size()));
|
||||
for (const auto & elem : arr)
|
||||
total_size += size(elem);
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
requires is_arithmetic_v<T>
|
||||
void read(T & x, ReadBuffer & in)
|
||||
|
@ -45,6 +45,7 @@ uint64_t ACLMap::convertACLs(const Coordination::ACLs & acls)
|
||||
if (acls.empty())
|
||||
return 0;
|
||||
|
||||
std::lock_guard lock(map_mutex);
|
||||
if (acl_to_num.contains(acls))
|
||||
return acl_to_num[acls];
|
||||
|
||||
@ -62,6 +63,7 @@ Coordination::ACLs ACLMap::convertNumber(uint64_t acls_id) const
|
||||
if (acls_id == 0)
|
||||
return Coordination::ACLs{};
|
||||
|
||||
std::lock_guard lock(map_mutex);
|
||||
if (!num_to_acl.contains(acls_id))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown ACL id {}. It's a bug", acls_id);
|
||||
|
||||
@ -70,6 +72,7 @@ Coordination::ACLs ACLMap::convertNumber(uint64_t acls_id) const
|
||||
|
||||
void ACLMap::addMapping(uint64_t acls_id, const Coordination::ACLs & acls)
|
||||
{
|
||||
std::lock_guard lock(map_mutex);
|
||||
num_to_acl[acls_id] = acls;
|
||||
acl_to_num[acls] = acls_id;
|
||||
max_acl_id = std::max(acls_id + 1, max_acl_id); /// max_acl_id pointer next slot
|
||||
@ -77,11 +80,13 @@ void ACLMap::addMapping(uint64_t acls_id, const Coordination::ACLs & acls)
|
||||
|
||||
void ACLMap::addUsage(uint64_t acl_id)
|
||||
{
|
||||
std::lock_guard lock(map_mutex);
|
||||
usage_counter[acl_id]++;
|
||||
}
|
||||
|
||||
void ACLMap::removeUsage(uint64_t acl_id)
|
||||
{
|
||||
std::lock_guard lock(map_mutex);
|
||||
if (!usage_counter.contains(acl_id))
|
||||
return;
|
||||
|
||||
|
@ -32,6 +32,8 @@ private:
|
||||
NumToACLMap num_to_acl;
|
||||
UsageCounter usage_counter;
|
||||
uint64_t max_acl_id{1};
|
||||
|
||||
mutable std::mutex map_mutex;
|
||||
public:
|
||||
|
||||
/// Convert ACL to number. If it's new ACL than adds it to map
|
||||
|
@ -301,11 +301,13 @@ String MonitorCommand::run()
|
||||
|
||||
print(ret, "server_state", keeper_info.getRole());
|
||||
|
||||
print(ret, "znode_count", state_machine.getNodesCount());
|
||||
print(ret, "watch_count", state_machine.getTotalWatchesCount());
|
||||
print(ret, "ephemerals_count", state_machine.getTotalEphemeralNodesCount());
|
||||
print(ret, "approximate_data_size", state_machine.getApproximateDataSize());
|
||||
print(ret, "key_arena_size", state_machine.getKeyArenaSize());
|
||||
const auto & storage_stats = state_machine.getStorageStats();
|
||||
|
||||
print(ret, "znode_count", storage_stats.nodes_count.load(std::memory_order_relaxed));
|
||||
print(ret, "watch_count", storage_stats.total_watches_count.load(std::memory_order_relaxed));
|
||||
print(ret, "ephemerals_count", storage_stats.total_emphemeral_nodes_count.load(std::memory_order_relaxed));
|
||||
print(ret, "approximate_data_size", storage_stats.approximate_data_size.load(std::memory_order_relaxed));
|
||||
print(ret, "key_arena_size", 0);
|
||||
print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotSize());
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_DARWIN)
|
||||
@ -387,6 +389,7 @@ String ServerStatCommand::run()
|
||||
|
||||
auto & stats = keeper_dispatcher.getKeeperConnectionStats();
|
||||
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
|
||||
const auto & storage_stats = keeper_dispatcher.getStateMachine().getStorageStats();
|
||||
|
||||
write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);
|
||||
|
||||
@ -398,9 +401,9 @@ String ServerStatCommand::run()
|
||||
write("Sent", toString(stats.getPacketsSent()));
|
||||
write("Connections", toString(keeper_info.alive_connections_count));
|
||||
write("Outstanding", toString(keeper_info.outstanding_requests_count));
|
||||
write("Zxid", formatZxid(keeper_info.last_zxid));
|
||||
write("Zxid", formatZxid(storage_stats.last_zxid.load(std::memory_order_relaxed)));
|
||||
write("Mode", keeper_info.getRole());
|
||||
write("Node count", toString(keeper_info.total_nodes_count));
|
||||
write("Node count", toString(storage_stats.nodes_count.load(std::memory_order_relaxed)));
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
@ -416,6 +419,7 @@ String StatCommand::run()
|
||||
|
||||
auto & stats = keeper_dispatcher.getKeeperConnectionStats();
|
||||
Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo();
|
||||
const auto & storage_stats = keeper_dispatcher.getStateMachine().getStorageStats();
|
||||
|
||||
write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH);
|
||||
|
||||
@ -431,9 +435,9 @@ String StatCommand::run()
|
||||
write("Sent", toString(stats.getPacketsSent()));
|
||||
write("Connections", toString(keeper_info.alive_connections_count));
|
||||
write("Outstanding", toString(keeper_info.outstanding_requests_count));
|
||||
write("Zxid", formatZxid(keeper_info.last_zxid));
|
||||
write("Zxid", formatZxid(storage_stats.last_zxid.load(std::memory_order_relaxed)));
|
||||
write("Mode", keeper_info.getRole());
|
||||
write("Node count", toString(keeper_info.total_nodes_count));
|
||||
write("Node count", toString(storage_stats.nodes_count.load(std::memory_order_relaxed)));
|
||||
|
||||
return buf.str();
|
||||
}
|
||||
|
@ -1,7 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
@ -30,9 +28,6 @@ struct Keeper4LWInfo
|
||||
uint64_t follower_count;
|
||||
uint64_t synced_follower_count;
|
||||
|
||||
uint64_t total_nodes_count;
|
||||
int64_t last_zxid;
|
||||
|
||||
String getRole() const
|
||||
{
|
||||
if (is_standalone)
|
||||
|
@ -38,15 +38,16 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM
|
||||
is_follower = static_cast<size_t>(keeper_info.is_follower);
|
||||
is_exceeding_mem_soft_limit = static_cast<size_t>(keeper_info.is_exceeding_mem_soft_limit);
|
||||
|
||||
zxid = keeper_info.last_zxid;
|
||||
const auto & state_machine = keeper_dispatcher.getStateMachine();
|
||||
znode_count = state_machine.getNodesCount();
|
||||
watch_count = state_machine.getTotalWatchesCount();
|
||||
ephemerals_count = state_machine.getTotalEphemeralNodesCount();
|
||||
approximate_data_size = state_machine.getApproximateDataSize();
|
||||
key_arena_size = state_machine.getKeyArenaSize();
|
||||
session_with_watches = state_machine.getSessionsWithWatchesCount();
|
||||
paths_watched = state_machine.getWatchedPathsCount();
|
||||
const auto & storage_stats = state_machine.getStorageStats();
|
||||
zxid = storage_stats.last_zxid.load(std::memory_order_relaxed);
|
||||
znode_count = storage_stats.nodes_count.load(std::memory_order_relaxed);
|
||||
watch_count = storage_stats.total_watches_count.load(std::memory_order_relaxed);
|
||||
ephemerals_count = storage_stats.total_emphemeral_nodes_count.load(std::memory_order_relaxed);
|
||||
approximate_data_size = storage_stats.approximate_data_size.load(std::memory_order_relaxed);
|
||||
key_arena_size = 0;
|
||||
session_with_watches = storage_stats.sessions_with_watches_count.load(std::memory_order_relaxed);
|
||||
paths_watched = storage_stats.watched_paths_count.load(std::memory_order_relaxed);
|
||||
|
||||
# if defined(__linux__) || defined(__APPLE__)
|
||||
open_file_descriptor_count = getCurrentProcessFDCount();
|
||||
|
@ -305,7 +305,7 @@ void KeeperDispatcher::requestThread()
|
||||
if (has_read_request)
|
||||
{
|
||||
if (server->isLeaderAlive())
|
||||
server->putLocalReadRequest(request);
|
||||
server->putLocalReadRequest({request});
|
||||
else
|
||||
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
|
||||
}
|
||||
|
@ -1207,8 +1207,6 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const
|
||||
result.synced_follower_count = getSyncedFollowerCount();
|
||||
}
|
||||
result.is_exceeding_mem_soft_limit = isExceedingMemorySoftLimit();
|
||||
result.total_nodes_count = getKeeperStateMachine()->getNodesCount();
|
||||
result.last_zxid = getKeeperStateMachine()->getLastProcessedZxid();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -78,20 +78,20 @@ namespace
|
||||
writeBinary(false, out);
|
||||
|
||||
/// Serialize stat
|
||||
writeBinary(node.czxid, out);
|
||||
writeBinary(node.mzxid, out);
|
||||
writeBinary(node.ctime(), out);
|
||||
writeBinary(node.mtime, out);
|
||||
writeBinary(node.version, out);
|
||||
writeBinary(node.cversion, out);
|
||||
writeBinary(node.aversion, out);
|
||||
writeBinary(node.ephemeralOwner(), out);
|
||||
writeBinary(node.stats.czxid, out);
|
||||
writeBinary(node.stats.mzxid, out);
|
||||
writeBinary(node.stats.ctime(), out);
|
||||
writeBinary(node.stats.mtime, out);
|
||||
writeBinary(node.stats.version, out);
|
||||
writeBinary(node.stats.cversion, out);
|
||||
writeBinary(node.stats.aversion, out);
|
||||
writeBinary(node.stats.ephemeralOwner(), out);
|
||||
if (version < SnapshotVersion::V6)
|
||||
writeBinary(static_cast<int32_t>(node.getData().size()), out);
|
||||
writeBinary(node.numChildren(), out);
|
||||
writeBinary(node.pzxid, out);
|
||||
writeBinary(static_cast<int32_t>(node.stats.data_size), out);
|
||||
writeBinary(node.stats.numChildren(), out);
|
||||
writeBinary(node.stats.pzxid, out);
|
||||
|
||||
writeBinary(node.seqNum(), out);
|
||||
writeBinary(node.stats.seqNum(), out);
|
||||
|
||||
if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
|
||||
writeBinary(node.sizeInBytes(), out);
|
||||
@ -100,11 +100,11 @@ namespace
|
||||
template<typename Node>
|
||||
void readNode(Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map)
|
||||
{
|
||||
readVarUInt(node.data_size, in);
|
||||
if (node.data_size != 0)
|
||||
readVarUInt(node.stats.data_size, in);
|
||||
if (node.stats.data_size != 0)
|
||||
{
|
||||
node.data = std::unique_ptr<char[]>(new char[node.data_size]);
|
||||
in.readStrict(node.data.get(), node.data_size);
|
||||
node.data = std::unique_ptr<char[]>(new char[node.stats.data_size]);
|
||||
in.readStrict(node.data.get(), node.stats.data_size);
|
||||
}
|
||||
|
||||
if (version >= SnapshotVersion::V1)
|
||||
@ -141,19 +141,19 @@ namespace
|
||||
}
|
||||
|
||||
/// Deserialize stat
|
||||
readBinary(node.czxid, in);
|
||||
readBinary(node.mzxid, in);
|
||||
readBinary(node.stats.czxid, in);
|
||||
readBinary(node.stats.mzxid, in);
|
||||
int64_t ctime;
|
||||
readBinary(ctime, in);
|
||||
node.setCtime(ctime);
|
||||
readBinary(node.mtime, in);
|
||||
readBinary(node.version, in);
|
||||
readBinary(node.cversion, in);
|
||||
readBinary(node.aversion, in);
|
||||
node.stats.setCtime(ctime);
|
||||
readBinary(node.stats.mtime, in);
|
||||
readBinary(node.stats.version, in);
|
||||
readBinary(node.stats.cversion, in);
|
||||
readBinary(node.stats.aversion, in);
|
||||
int64_t ephemeral_owner = 0;
|
||||
readBinary(ephemeral_owner, in);
|
||||
if (ephemeral_owner != 0)
|
||||
node.setEphemeralOwner(ephemeral_owner);
|
||||
node.stats.setEphemeralOwner(ephemeral_owner);
|
||||
|
||||
if (version < SnapshotVersion::V6)
|
||||
{
|
||||
@ -163,14 +163,14 @@ namespace
|
||||
int32_t num_children = 0;
|
||||
readBinary(num_children, in);
|
||||
if (ephemeral_owner == 0)
|
||||
node.setNumChildren(num_children);
|
||||
node.stats.setNumChildren(num_children);
|
||||
|
||||
readBinary(node.pzxid, in);
|
||||
readBinary(node.stats.pzxid, in);
|
||||
|
||||
int32_t seq_num = 0;
|
||||
readBinary(seq_num, in);
|
||||
if (ephemeral_owner == 0)
|
||||
node.setSeqNum(seq_num);
|
||||
node.stats.setSeqNum(seq_num);
|
||||
|
||||
if (version >= SnapshotVersion::V4 && version <= SnapshotVersion::V5)
|
||||
{
|
||||
@ -256,7 +256,7 @@ void KeeperStorageSnapshot<Storage>::serialize(const KeeperStorageSnapshot<Stora
|
||||
/// Benign race condition possible while taking snapshot: NuRaft decide to create snapshot at some log id
|
||||
/// and only after some time we lock storage and enable snapshot mode. So snapshot_container_size can be
|
||||
/// slightly bigger than required.
|
||||
if (node.mzxid > snapshot.zxid)
|
||||
if (node.stats.mzxid > snapshot.zxid)
|
||||
break;
|
||||
writeBinary(path, out);
|
||||
writeNode(node, snapshot.version, out);
|
||||
@ -306,7 +306,7 @@ void KeeperStorageSnapshot<Storage>::serialize(const KeeperStorageSnapshot<Stora
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<Storage> & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context)
|
||||
void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<Storage> & deserialization_result, ReadBuffer & in, KeeperContextPtr keeper_context) TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
uint8_t version;
|
||||
readBinary(version, in);
|
||||
@ -435,13 +435,13 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
|
||||
}
|
||||
}
|
||||
|
||||
auto ephemeral_owner = node.ephemeralOwner();
|
||||
auto ephemeral_owner = node.stats.ephemeralOwner();
|
||||
if constexpr (!use_rocksdb)
|
||||
if (!node.isEphemeral() && node.numChildren() > 0)
|
||||
node.getChildren().reserve(node.numChildren());
|
||||
if (!node.stats.isEphemeral() && node.stats.numChildren() > 0)
|
||||
node.getChildren().reserve(node.stats.numChildren());
|
||||
|
||||
if (ephemeral_owner != 0)
|
||||
storage.ephemerals[node.ephemeralOwner()].insert(std::string{path});
|
||||
storage.committed_ephemerals[node.stats.ephemeralOwner()].insert(std::string{path});
|
||||
|
||||
if (recalculate_digest)
|
||||
storage.nodes_digest += node.getDigest(path);
|
||||
@ -467,16 +467,25 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
|
||||
{
|
||||
if (itr.key != "/")
|
||||
{
|
||||
if (itr.value.numChildren() != static_cast<int32_t>(itr.value.getChildren().size()))
|
||||
if (itr.value.stats.numChildren() != static_cast<int32_t>(itr.value.getChildren().size()))
|
||||
{
|
||||
#ifdef NDEBUG
|
||||
/// TODO (alesapin) remove this, it should be always CORRUPTED_DATA.
|
||||
LOG_ERROR(getLogger("KeeperSnapshotManager"), "Children counter in stat.numChildren {}"
|
||||
" is different from actual children size {} for node {}", itr.value.numChildren(), itr.value.getChildren().size(), itr.key);
|
||||
LOG_ERROR(
|
||||
getLogger("KeeperSnapshotManager"),
|
||||
"Children counter in stat.numChildren {}"
|
||||
" is different from actual children size {} for node {}",
|
||||
itr.value.stats.numChildren(),
|
||||
itr.value.getChildren().size(),
|
||||
itr.key);
|
||||
#else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}"
|
||||
" is different from actual children size {} for node {}",
|
||||
itr.value.numChildren(), itr.value.getChildren().size(), itr.key);
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Children counter in stat.numChildren {}"
|
||||
" is different from actual children size {} for node {}",
|
||||
itr.value.stats.numChildren(),
|
||||
itr.value.getChildren().size(),
|
||||
itr.key);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -511,7 +520,7 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
|
||||
session_auth_counter++;
|
||||
}
|
||||
if (!ids.empty())
|
||||
storage.session_and_auth[active_session_id] = ids;
|
||||
storage.committed_session_and_auth[active_session_id] = ids;
|
||||
}
|
||||
current_session_size++;
|
||||
}
|
||||
@ -527,6 +536,8 @@ void KeeperStorageSnapshot<Storage>::deserialize(SnapshotDeserializationResult<S
|
||||
buffer->pos(0);
|
||||
deserialization_result.cluster_config = ClusterConfig::deserialize(*buffer);
|
||||
}
|
||||
|
||||
storage.updateStats();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
@ -544,7 +555,7 @@ KeeperStorageSnapshot<Storage>::KeeperStorageSnapshot(Storage * storage_, uint64
|
||||
begin = storage->getSnapshotIteratorBegin();
|
||||
session_and_timeout = storage->getActiveSessions();
|
||||
acl_map = storage->acl_map.getMapping();
|
||||
session_and_auth = storage->session_and_auth;
|
||||
session_and_auth = storage->committed_session_and_auth;
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
@ -563,7 +574,7 @@ KeeperStorageSnapshot<Storage>::KeeperStorageSnapshot(
|
||||
begin = storage->getSnapshotIteratorBegin();
|
||||
session_and_timeout = storage->getActiveSessions();
|
||||
acl_map = storage->acl_map.getMapping();
|
||||
session_and_auth = storage->session_and_auth;
|
||||
session_and_auth = storage->committed_session_and_auth;
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
|
@ -36,6 +36,11 @@ namespace ProfileEvents
|
||||
extern const Event KeeperStorageLockWaitMicroseconds;
|
||||
}
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric KeeperAliveConnections;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -56,6 +61,7 @@ IKeeperStateMachine::IKeeperStateMachine(
|
||||
, snapshots_queue(snapshots_queue_)
|
||||
, min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache)
|
||||
, log(getLogger("KeeperStateMachine"))
|
||||
, read_pool(CurrentMetrics::KeeperAliveConnections, CurrentMetrics::KeeperAliveConnections, CurrentMetrics::KeeperAliveConnections, 100, 10000, 10000)
|
||||
, superdigest(superdigest_)
|
||||
, keeper_context(keeper_context_)
|
||||
, snapshot_manager_s3(snapshot_manager_s3_)
|
||||
@ -175,18 +181,20 @@ void assertDigest(
|
||||
}
|
||||
}
|
||||
|
||||
struct TSA_SCOPED_LOCKABLE LockGuardWithStats final
|
||||
template <bool shared = false>
|
||||
struct LockGuardWithStats final
|
||||
{
|
||||
std::unique_lock<std::mutex> lock;
|
||||
explicit LockGuardWithStats(std::mutex & mutex) TSA_ACQUIRE(mutex)
|
||||
using LockType = std::conditional_t<shared, std::shared_lock<SharedMutex>, std::unique_lock<SharedMutex>>;
|
||||
LockType lock;
|
||||
explicit LockGuardWithStats(SharedMutex & mutex)
|
||||
{
|
||||
Stopwatch watch;
|
||||
std::unique_lock l(mutex);
|
||||
LockType l(mutex);
|
||||
ProfileEvents::increment(ProfileEvents::KeeperStorageLockWaitMicroseconds, watch.elapsedMicroseconds());
|
||||
lock = std::move(l);
|
||||
}
|
||||
|
||||
~LockGuardWithStats() TSA_RELEASE() = default;
|
||||
~LockGuardWithStats() = default;
|
||||
};
|
||||
|
||||
}
|
||||
@ -312,13 +320,12 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
|
||||
if (op_num == Coordination::OpNum::SessionID || op_num == Coordination::OpNum::Reconfig)
|
||||
return true;
|
||||
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
|
||||
if (storage->isFinalized())
|
||||
return false;
|
||||
|
||||
try
|
||||
{
|
||||
LockGuardWithStats<true> lock(storage_mutex);
|
||||
storage->preprocessRequest(
|
||||
request_for_session.request,
|
||||
request_for_session.session_id,
|
||||
@ -335,7 +342,12 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
|
||||
}
|
||||
|
||||
if (keeper_context->digestEnabled() && request_for_session.digest)
|
||||
assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, request_for_session.log_idx, false);
|
||||
assertDigest(
|
||||
*request_for_session.digest,
|
||||
storage->getNodesDigest(false, /*lock_transaction_mutex=*/true),
|
||||
*request_for_session.request,
|
||||
request_for_session.log_idx,
|
||||
false);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -343,7 +355,7 @@ bool KeeperStateMachine<Storage>::preprocess(const KeeperStorageBase::RequestFor
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::reconfigure(const KeeperStorageBase::RequestForSession& request_for_session)
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
KeeperStorageBase::ResponseForSession response = processReconfiguration(request_for_session);
|
||||
if (!responses_queue.push(response))
|
||||
{
|
||||
@ -461,7 +473,7 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::commit(const uint64_t l
|
||||
response_for_session.response = response;
|
||||
response_for_session.request = request_for_session->request;
|
||||
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
session_id = storage->getSessionID(session_id_request.session_timeout_ms);
|
||||
LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms);
|
||||
response->session_id = session_id;
|
||||
@ -472,24 +484,31 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine<Storage>::commit(const uint64_t l
|
||||
if (op_num == Coordination::OpNum::Close)
|
||||
|
||||
{
|
||||
std::lock_guard lock(request_cache_mutex);
|
||||
std::lock_guard cache_lock(request_cache_mutex);
|
||||
parsed_request_cache.erase(request_for_session->session_id);
|
||||
}
|
||||
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
KeeperStorageBase::ResponsesForSessions responses_for_sessions
|
||||
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
|
||||
|
||||
for (auto & response_for_session : responses_for_sessions)
|
||||
{
|
||||
if (response_for_session.response->xid != Coordination::WATCH_XID)
|
||||
response_for_session.request = request_for_session->request;
|
||||
LockGuardWithStats<true> lock(storage_mutex);
|
||||
std::lock_guard response_lock(process_and_responses_lock);
|
||||
KeeperStorageBase::ResponsesForSessions responses_for_sessions
|
||||
= storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid);
|
||||
for (auto & response_for_session : responses_for_sessions)
|
||||
{
|
||||
if (response_for_session.response->xid != Coordination::WATCH_XID)
|
||||
response_for_session.request = request_for_session->request;
|
||||
|
||||
try_push(response_for_session);
|
||||
try_push(response_for_session);
|
||||
}
|
||||
}
|
||||
|
||||
if (keeper_context->digestEnabled() && request_for_session->digest)
|
||||
assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true);
|
||||
assertDigest(
|
||||
*request_for_session->digest,
|
||||
storage->getNodesDigest(true, /*lock_transaction_mutex=*/true),
|
||||
*request_for_session->request,
|
||||
request_for_session->log_idx,
|
||||
true);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::KeeperCommits);
|
||||
@ -534,8 +553,6 @@ bool KeeperStateMachine<Storage>::apply_snapshot(nuraft::snapshot & s)
|
||||
}
|
||||
|
||||
{ /// deserialize and apply snapshot to storage
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
|
||||
SnapshotDeserializationResult<Storage> snapshot_deserialization_result;
|
||||
if (latest_snapshot_ptr)
|
||||
snapshot_deserialization_result = snapshot_manager.deserializeSnapshotFromBuffer(latest_snapshot_ptr);
|
||||
@ -543,6 +560,7 @@ bool KeeperStateMachine<Storage>::apply_snapshot(nuraft::snapshot & s)
|
||||
snapshot_deserialization_result
|
||||
= snapshot_manager.deserializeSnapshotFromBuffer(snapshot_manager.deserializeSnapshotBufferFromDisk(s.get_last_log_idx()));
|
||||
|
||||
LockGuardWithStats storage_lock(storage_mutex);
|
||||
/// maybe some logs were preprocessed with log idx larger than the snapshot idx
|
||||
/// we have to apply them to the new storage
|
||||
storage->applyUncommittedState(*snapshot_deserialization_result.storage, snapshot_deserialization_result.snapshot_meta->get_last_log_idx());
|
||||
@ -587,16 +605,7 @@ void KeeperStateMachine<Storage>::rollbackRequest(const KeeperStorageBase::Reque
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
return;
|
||||
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
storage->rollbackRequest(request_for_session.zxid, allow_missing);
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::rollbackRequestNoLock(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing)
|
||||
{
|
||||
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
|
||||
return;
|
||||
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
storage->rollbackRequest(request_for_session.zxid, allow_missing);
|
||||
}
|
||||
|
||||
@ -616,7 +625,7 @@ void KeeperStateMachine<Storage>::create_snapshot(nuraft::snapshot & s, nuraft::
|
||||
auto snapshot_meta_copy = nuraft::snapshot::deserialize(*snp_buf);
|
||||
CreateSnapshotTask snapshot_task;
|
||||
{ /// lock storage for a short period time to turn on "snapshot mode". After that we can read consistent storage state without locking.
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
snapshot_task.snapshot = std::make_shared<KeeperStorageSnapshot<Storage>>(storage.get(), snapshot_meta_copy, getClusterConfig());
|
||||
}
|
||||
|
||||
@ -681,7 +690,7 @@ void KeeperStateMachine<Storage>::create_snapshot(nuraft::snapshot & s, nuraft::
|
||||
}
|
||||
{
|
||||
/// Destroy snapshot with lock
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
LOG_TRACE(log, "Clearing garbage after snapshot");
|
||||
/// Turn off "snapshot mode" and clear outdate part of storage state
|
||||
storage->clearGarbageAfterSnapshot();
|
||||
@ -824,10 +833,10 @@ template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::processReadRequest(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
{
|
||||
/// Pure local request, just process it with storage
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats<true> storage_lock(storage_mutex);
|
||||
std::lock_guard response_lock(process_and_responses_lock);
|
||||
auto responses = storage->processRequest(
|
||||
request_for_session.request, request_for_session.session_id, std::nullopt, true /*check_acl*/, true /*is_local*/);
|
||||
|
||||
for (auto & response_for_session : responses)
|
||||
{
|
||||
if (response_for_session.response->xid != Coordination::WATCH_XID)
|
||||
@ -840,112 +849,116 @@ void KeeperStateMachine<Storage>::processReadRequest(const KeeperStorageBase::Re
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::shutdownStorage()
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
storage->finalize();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
std::vector<int64_t> KeeperStateMachine<Storage>::getDeadSessions()
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getDeadSessions();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
int64_t KeeperStateMachine<Storage>::getNextZxid() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getNextZXID();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
KeeperStorageBase::Digest KeeperStateMachine<Storage>::getNodesDigest() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getNodesDigest(false);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getNodesDigest(false, /*lock_transaction_mutex=*/true);
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getLastProcessedZxid() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
return storage->getZXID();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
const KeeperStorageBase::Stats & KeeperStateMachine<Storage>::getStorageStats() const TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
return storage->getStorageStats();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getNodesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getNodesCount();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getTotalWatchesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getTotalWatchesCount();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getWatchedPathsCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getWatchedPathsCount();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getSessionsWithWatchesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getSessionsWithWatchesCount();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getTotalEphemeralNodesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getTotalEphemeralNodesCount();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getSessionWithEphemeralNodesCount() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getSessionWithEphemeralNodesCount();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::dumpWatches(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
storage->dumpWatches(buf);
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::dumpWatchesByPath(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
storage->dumpWatchesByPath(buf);
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::dumpSessionsAndEphemerals(WriteBufferFromOwnString & buf) const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
storage->dumpSessionsAndEphemerals(buf);
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getApproximateDataSize() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getApproximateDataSize();
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
uint64_t KeeperStateMachine<Storage>::getKeyArenaSize() const
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
return storage->getArenaDataSize();
|
||||
}
|
||||
|
||||
@ -988,7 +1001,7 @@ ClusterConfigPtr IKeeperStateMachine::getClusterConfig() const
|
||||
template<typename Storage>
|
||||
void KeeperStateMachine<Storage>::recalculateStorageStats()
|
||||
{
|
||||
LockGuardWithStats lock(storage_and_responses_lock);
|
||||
LockGuardWithStats lock(storage_mutex);
|
||||
LOG_INFO(log, "Recalculating storage stats");
|
||||
storage->recalculateStats();
|
||||
LOG_INFO(log, "Done recalculating storage stats");
|
||||
|
@ -85,6 +85,8 @@ public:
|
||||
/// Introspection functions for 4lw commands
|
||||
virtual uint64_t getLastProcessedZxid() const = 0;
|
||||
|
||||
virtual const KeeperStorageBase::Stats & getStorageStats() const = 0;
|
||||
|
||||
virtual uint64_t getNodesCount() const = 0;
|
||||
virtual uint64_t getTotalWatchesCount() const = 0;
|
||||
virtual uint64_t getWatchedPathsCount() const = 0;
|
||||
@ -124,12 +126,16 @@ protected:
|
||||
/// Mutex for snapshots
|
||||
mutable std::mutex snapshots_lock;
|
||||
|
||||
/// Lock for storage and responses_queue. It's important to process requests
|
||||
/// Lock for the storage
|
||||
/// Storage works in thread-safe way ONLY for preprocessing/processing
|
||||
/// In any other case, unique storage lock needs to be taken
|
||||
mutable SharedMutex storage_mutex;
|
||||
/// Lock for processing and responses_queue. It's important to process requests
|
||||
/// and push them to the responses queue while holding this lock. Otherwise
|
||||
/// we can get strange cases when, for example client send read request with
|
||||
/// watch and after that receive watch response and only receive response
|
||||
/// for request.
|
||||
mutable std::mutex storage_and_responses_lock;
|
||||
mutable std::mutex process_and_responses_lock;
|
||||
|
||||
std::unordered_map<int64_t, std::unordered_map<Coordination::XID, std::shared_ptr<KeeperStorageBase::RequestForSession>>> parsed_request_cache;
|
||||
uint64_t min_request_size_to_cache{0};
|
||||
@ -146,6 +152,7 @@ protected:
|
||||
mutable std::mutex cluster_config_lock;
|
||||
ClusterConfigPtr cluster_config;
|
||||
|
||||
ThreadPool read_pool;
|
||||
/// Special part of ACL system -- superdigest specified in server config.
|
||||
const std::string superdigest;
|
||||
|
||||
@ -153,10 +160,8 @@ protected:
|
||||
|
||||
KeeperSnapshotManagerS3 * snapshot_manager_s3;
|
||||
|
||||
virtual KeeperStorageBase::ResponseForSession processReconfiguration(
|
||||
const KeeperStorageBase::RequestForSession& request_for_session)
|
||||
TSA_REQUIRES(storage_and_responses_lock) = 0;
|
||||
|
||||
virtual KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
= 0;
|
||||
};
|
||||
|
||||
/// ClickHouse Keeper state machine. Wrapper for KeeperStorage.
|
||||
@ -189,10 +194,6 @@ public:
|
||||
// (can happen in case of exception during preprocessing)
|
||||
void rollbackRequest(const KeeperStorageBase::RequestForSession & request_for_session, bool allow_missing) override;
|
||||
|
||||
void rollbackRequestNoLock(
|
||||
const KeeperStorageBase::RequestForSession & request_for_session,
|
||||
bool allow_missing) TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
|
||||
/// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state.
|
||||
bool apply_snapshot(nuraft::snapshot & s) override;
|
||||
|
||||
@ -205,7 +206,7 @@ public:
|
||||
// This should be used only for tests or keeper-data-dumper because it violates
|
||||
// TSA -- we can't acquire the lock outside of this class or return a storage under lock
|
||||
// in a reasonable way.
|
||||
Storage & getStorageUnsafe() TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
Storage & getStorageUnsafe()
|
||||
{
|
||||
return *storage;
|
||||
}
|
||||
@ -224,6 +225,8 @@ public:
|
||||
/// Introspection functions for 4lw commands
|
||||
uint64_t getLastProcessedZxid() const override;
|
||||
|
||||
const KeeperStorageBase::Stats & getStorageStats() const override;
|
||||
|
||||
uint64_t getNodesCount() const override;
|
||||
uint64_t getTotalWatchesCount() const override;
|
||||
uint64_t getWatchedPathsCount() const override;
|
||||
@ -245,12 +248,12 @@ public:
|
||||
|
||||
private:
|
||||
/// Main state machine logic
|
||||
std::unique_ptr<Storage> storage; //TSA_PT_GUARDED_BY(storage_and_responses_lock);
|
||||
std::unique_ptr<Storage> storage;
|
||||
|
||||
/// Save/Load and Serialize/Deserialize logic for snapshots.
|
||||
KeeperSnapshotManager<Storage> snapshot_manager;
|
||||
|
||||
KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session)
|
||||
TSA_REQUIRES(storage_and_responses_lock) override;
|
||||
KeeperStorageBase::ResponseForSession processReconfiguration(const KeeperStorageBase::RequestForSession & request_for_session) override;
|
||||
};
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,10 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <Coordination/ACLMap.h>
|
||||
#include <Coordination/SessionExpiryQueue.h>
|
||||
#include <Coordination/SnapshotableHashTable.h>
|
||||
#include "Common/StringHashForHeterogeneousLookup.h"
|
||||
#include <Common/SharedMutex.h>
|
||||
#include <Common/Concepts.h>
|
||||
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <absl/container/flat_hash_set.h>
|
||||
|
||||
@ -23,14 +29,11 @@ using ResponseCallback = std::function<void(const Coordination::ZooKeeperRespons
|
||||
using ChildrenSet = absl::flat_hash_set<StringRef, StringRefHash>;
|
||||
using SessionAndTimeout = std::unordered_map<int64_t, int64_t>;
|
||||
|
||||
/// KeeperRocksNodeInfo is used in RocksDB keeper.
|
||||
/// It is serialized directly as POD to RocksDB.
|
||||
struct KeeperRocksNodeInfo
|
||||
struct NodeStats
|
||||
{
|
||||
int64_t czxid{0};
|
||||
int64_t mzxid{0};
|
||||
int64_t pzxid{0};
|
||||
uint64_t acl_id = 0; /// 0 -- no ACL by default
|
||||
|
||||
int64_t mtime{0};
|
||||
|
||||
@ -38,225 +41,9 @@ struct KeeperRocksNodeInfo
|
||||
int32_t cversion{0};
|
||||
int32_t aversion{0};
|
||||
|
||||
int32_t seq_num = 0;
|
||||
mutable UInt64 digest = 0; /// we cached digest for this node.
|
||||
|
||||
/// as ctime can't be negative because it stores the timestamp when the
|
||||
/// node was created, we can use the MSB for a bool
|
||||
struct
|
||||
{
|
||||
bool is_ephemeral : 1;
|
||||
int64_t ctime : 63;
|
||||
} is_ephemeral_and_ctime{false, 0};
|
||||
|
||||
/// ephemeral notes cannot have children so a node can set either
|
||||
/// ephemeral_owner OR seq_num + num_children
|
||||
union
|
||||
{
|
||||
int64_t ephemeral_owner;
|
||||
struct
|
||||
{
|
||||
int32_t seq_num;
|
||||
int32_t num_children;
|
||||
} children_info;
|
||||
} ephemeral_or_children_data{0};
|
||||
|
||||
bool isEphemeral() const
|
||||
{
|
||||
return is_ephemeral_and_ctime.is_ephemeral;
|
||||
}
|
||||
|
||||
int64_t ephemeralOwner() const
|
||||
{
|
||||
if (isEphemeral())
|
||||
return ephemeral_or_children_data.ephemeral_owner;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void setEphemeralOwner(int64_t ephemeral_owner)
|
||||
{
|
||||
is_ephemeral_and_ctime.is_ephemeral = ephemeral_owner != 0;
|
||||
ephemeral_or_children_data.ephemeral_owner = ephemeral_owner;
|
||||
}
|
||||
|
||||
int32_t numChildren() const
|
||||
{
|
||||
if (isEphemeral())
|
||||
return 0;
|
||||
|
||||
return ephemeral_or_children_data.children_info.num_children;
|
||||
}
|
||||
|
||||
void setNumChildren(int32_t num_children)
|
||||
{
|
||||
ephemeral_or_children_data.children_info.num_children = num_children;
|
||||
}
|
||||
|
||||
/// dummy interface for test
|
||||
void addChild(StringRef) {}
|
||||
auto getChildren() const
|
||||
{
|
||||
return std::vector<int>(numChildren());
|
||||
}
|
||||
|
||||
void increaseNumChildren()
|
||||
{
|
||||
chassert(!isEphemeral());
|
||||
++ephemeral_or_children_data.children_info.num_children;
|
||||
}
|
||||
|
||||
void decreaseNumChildren()
|
||||
{
|
||||
chassert(!isEphemeral());
|
||||
--ephemeral_or_children_data.children_info.num_children;
|
||||
}
|
||||
|
||||
int32_t seqNum() const
|
||||
{
|
||||
if (isEphemeral())
|
||||
return 0;
|
||||
|
||||
return ephemeral_or_children_data.children_info.seq_num;
|
||||
}
|
||||
|
||||
void setSeqNum(int32_t seq_num_)
|
||||
{
|
||||
ephemeral_or_children_data.children_info.seq_num = seq_num_;
|
||||
}
|
||||
|
||||
void increaseSeqNum()
|
||||
{
|
||||
chassert(!isEphemeral());
|
||||
++ephemeral_or_children_data.children_info.seq_num;
|
||||
}
|
||||
|
||||
int64_t ctime() const
|
||||
{
|
||||
return is_ephemeral_and_ctime.ctime;
|
||||
}
|
||||
|
||||
void setCtime(uint64_t ctime)
|
||||
{
|
||||
is_ephemeral_and_ctime.ctime = ctime;
|
||||
}
|
||||
uint32_t data_size{0};
|
||||
|
||||
void copyStats(const Coordination::Stat & stat);
|
||||
};
|
||||
|
||||
/// KeeperRocksNode is the memory structure used by RocksDB
|
||||
struct KeeperRocksNode : public KeeperRocksNodeInfo
|
||||
{
|
||||
#if USE_ROCKSDB
|
||||
friend struct RocksDBContainer<KeeperRocksNode>;
|
||||
#endif
|
||||
using Meta = KeeperRocksNodeInfo;
|
||||
|
||||
uint64_t size_bytes = 0; // only for compatible, should be deprecated
|
||||
|
||||
uint64_t sizeInBytes() const { return data_size + sizeof(KeeperRocksNodeInfo); }
|
||||
void setData(String new_data)
|
||||
{
|
||||
data_size = static_cast<uint32_t>(new_data.size());
|
||||
if (data_size != 0)
|
||||
{
|
||||
data = std::unique_ptr<char[]>(new char[new_data.size()]);
|
||||
memcpy(data.get(), new_data.data(), data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void shallowCopy(const KeeperRocksNode & other)
|
||||
{
|
||||
czxid = other.czxid;
|
||||
mzxid = other.mzxid;
|
||||
pzxid = other.pzxid;
|
||||
acl_id = other.acl_id; /// 0 -- no ACL by default
|
||||
|
||||
mtime = other.mtime;
|
||||
|
||||
is_ephemeral_and_ctime = other.is_ephemeral_and_ctime;
|
||||
|
||||
ephemeral_or_children_data = other.ephemeral_or_children_data;
|
||||
|
||||
data_size = other.data_size;
|
||||
if (data_size != 0)
|
||||
{
|
||||
data = std::unique_ptr<char[]>(new char[data_size]);
|
||||
memcpy(data.get(), other.data.get(), data_size);
|
||||
}
|
||||
|
||||
version = other.version;
|
||||
cversion = other.cversion;
|
||||
aversion = other.aversion;
|
||||
|
||||
/// cached_digest = other.cached_digest;
|
||||
}
|
||||
void invalidateDigestCache() const;
|
||||
UInt64 getDigest(std::string_view path) const;
|
||||
String getEncodedString();
|
||||
void decodeFromString(const String & buffer_str);
|
||||
void recalculateSize() {}
|
||||
std::string_view getData() const noexcept { return {data.get(), data_size}; }
|
||||
|
||||
void setResponseStat(Coordination::Stat & response_stat) const
|
||||
{
|
||||
response_stat.czxid = czxid;
|
||||
response_stat.mzxid = mzxid;
|
||||
response_stat.ctime = ctime();
|
||||
response_stat.mtime = mtime;
|
||||
response_stat.version = version;
|
||||
response_stat.cversion = cversion;
|
||||
response_stat.aversion = aversion;
|
||||
response_stat.ephemeralOwner = ephemeralOwner();
|
||||
response_stat.dataLength = static_cast<int32_t>(data_size);
|
||||
response_stat.numChildren = numChildren();
|
||||
response_stat.pzxid = pzxid;
|
||||
}
|
||||
|
||||
void reset()
|
||||
{
|
||||
serialized = false;
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return data_size == 0 && mzxid == 0;
|
||||
}
|
||||
std::unique_ptr<char[]> data{nullptr};
|
||||
uint32_t data_size{0};
|
||||
private:
|
||||
bool serialized = false;
|
||||
};
|
||||
|
||||
/// KeeperMemNode should have as minimal size as possible to reduce memory footprint
|
||||
/// of stored nodes
|
||||
/// New fields should be added to the struct only if it's really necessary
|
||||
struct KeeperMemNode
|
||||
{
|
||||
int64_t czxid{0};
|
||||
int64_t mzxid{0};
|
||||
int64_t pzxid{0};
|
||||
uint64_t acl_id = 0; /// 0 -- no ACL by default
|
||||
|
||||
int64_t mtime{0};
|
||||
|
||||
std::unique_ptr<char[]> data{nullptr};
|
||||
uint32_t data_size{0};
|
||||
|
||||
int32_t version{0};
|
||||
int32_t cversion{0};
|
||||
int32_t aversion{0};
|
||||
|
||||
mutable uint64_t cached_digest = 0;
|
||||
|
||||
KeeperMemNode() = default;
|
||||
|
||||
KeeperMemNode & operator=(const KeeperMemNode & other);
|
||||
KeeperMemNode(const KeeperMemNode & other);
|
||||
|
||||
KeeperMemNode & operator=(KeeperMemNode && other) noexcept;
|
||||
KeeperMemNode(KeeperMemNode && other) noexcept;
|
||||
|
||||
bool empty() const;
|
||||
|
||||
bool isEphemeral() const
|
||||
{
|
||||
@ -287,6 +74,7 @@ struct KeeperMemNode
|
||||
|
||||
void setNumChildren(int32_t num_children)
|
||||
{
|
||||
is_ephemeral_and_ctime.is_ephemeral = false;
|
||||
ephemeral_or_children_data.children_info.num_children = num_children;
|
||||
}
|
||||
|
||||
@ -331,34 +119,6 @@ struct KeeperMemNode
|
||||
is_ephemeral_and_ctime.ctime = ctime;
|
||||
}
|
||||
|
||||
void copyStats(const Coordination::Stat & stat);
|
||||
|
||||
void setResponseStat(Coordination::Stat & response_stat) const;
|
||||
|
||||
/// Object memory size
|
||||
uint64_t sizeInBytes() const;
|
||||
|
||||
void setData(const String & new_data);
|
||||
|
||||
std::string_view getData() const noexcept { return {data.get(), data_size}; }
|
||||
|
||||
void addChild(StringRef child_path);
|
||||
|
||||
void removeChild(StringRef child_path);
|
||||
|
||||
const auto & getChildren() const noexcept { return children; }
|
||||
auto & getChildren() { return children; }
|
||||
|
||||
// Invalidate the calculated digest so it's recalculated again on the next
|
||||
// getDigest call
|
||||
void invalidateDigestCache() const;
|
||||
|
||||
// get the calculated digest of the node
|
||||
UInt64 getDigest(std::string_view path) const;
|
||||
|
||||
// copy only necessary information for preprocessing and digest calculation
|
||||
// (e.g. we don't need to copy list of children)
|
||||
void shallowCopy(const KeeperMemNode & other);
|
||||
private:
|
||||
/// as ctime can't be negative because it stores the timestamp when the
|
||||
/// node was created, we can use the MSB for a bool
|
||||
@ -379,7 +139,132 @@ private:
|
||||
int32_t num_children;
|
||||
} children_info;
|
||||
} ephemeral_or_children_data{0};
|
||||
};
|
||||
|
||||
/// KeeperRocksNodeInfo is used in RocksDB keeper.
|
||||
/// It is serialized directly as POD to RocksDB.
|
||||
struct KeeperRocksNodeInfo
|
||||
{
|
||||
NodeStats stats;
|
||||
uint64_t acl_id = 0; /// 0 -- no ACL by default
|
||||
|
||||
/// dummy interface for test
|
||||
void addChild(StringRef) {}
|
||||
auto getChildren() const
|
||||
{
|
||||
return std::vector<int>(stats.numChildren());
|
||||
}
|
||||
|
||||
void copyStats(const Coordination::Stat & stat);
|
||||
};
|
||||
|
||||
/// KeeperRocksNode is the memory structure used by RocksDB
|
||||
struct KeeperRocksNode : public KeeperRocksNodeInfo
|
||||
{
|
||||
#if USE_ROCKSDB
|
||||
friend struct RocksDBContainer<KeeperRocksNode>;
|
||||
#endif
|
||||
using Meta = KeeperRocksNodeInfo;
|
||||
|
||||
uint64_t size_bytes = 0; // only for compatible, should be deprecated
|
||||
|
||||
uint64_t sizeInBytes() const { return stats.data_size + sizeof(KeeperRocksNodeInfo); }
|
||||
|
||||
void setData(String new_data)
|
||||
{
|
||||
stats.data_size = static_cast<uint32_t>(new_data.size());
|
||||
if (stats.data_size != 0)
|
||||
{
|
||||
data = std::unique_ptr<char[]>(new char[new_data.size()]);
|
||||
memcpy(data.get(), new_data.data(), stats.data_size);
|
||||
}
|
||||
}
|
||||
|
||||
void shallowCopy(const KeeperRocksNode & other)
|
||||
{
|
||||
stats = other.stats;
|
||||
acl_id = other.acl_id;
|
||||
if (stats.data_size != 0)
|
||||
{
|
||||
data = std::unique_ptr<char[]>(new char[stats.data_size]);
|
||||
memcpy(data.get(), other.data.get(), stats.data_size);
|
||||
}
|
||||
|
||||
/// cached_digest = other.cached_digest;
|
||||
}
|
||||
void invalidateDigestCache() const;
|
||||
UInt64 getDigest(std::string_view path) const;
|
||||
String getEncodedString();
|
||||
void decodeFromString(const String & buffer_str);
|
||||
void recalculateSize() {}
|
||||
std::string_view getData() const noexcept { return {data.get(), stats.data_size}; }
|
||||
|
||||
void setResponseStat(Coordination::Stat & response_stat) const;
|
||||
|
||||
void reset()
|
||||
{
|
||||
serialized = false;
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return stats.data_size == 0 && stats.mzxid == 0;
|
||||
}
|
||||
std::unique_ptr<char[]> data{nullptr};
|
||||
mutable UInt64 cached_digest = 0; /// we cached digest for this node.
|
||||
private:
|
||||
bool serialized = false;
|
||||
};
|
||||
|
||||
/// KeeperMemNode should have as minimal size as possible to reduce memory footprint
|
||||
/// of stored nodes
|
||||
/// New fields should be added to the struct only if it's really necessary
|
||||
struct KeeperMemNode
|
||||
{
|
||||
NodeStats stats;
|
||||
std::unique_ptr<char[]> data{nullptr};
|
||||
mutable uint64_t cached_digest = 0;
|
||||
|
||||
uint64_t acl_id = 0; /// 0 -- no ACL by default
|
||||
|
||||
KeeperMemNode() = default;
|
||||
|
||||
KeeperMemNode & operator=(const KeeperMemNode & other);
|
||||
KeeperMemNode(const KeeperMemNode & other);
|
||||
|
||||
KeeperMemNode & operator=(KeeperMemNode && other) noexcept;
|
||||
KeeperMemNode(KeeperMemNode && other) noexcept;
|
||||
|
||||
bool empty() const;
|
||||
|
||||
void copyStats(const Coordination::Stat & stat);
|
||||
|
||||
void setResponseStat(Coordination::Stat & response_stat) const;
|
||||
|
||||
/// Object memory size
|
||||
uint64_t sizeInBytes() const;
|
||||
|
||||
void setData(const String & new_data);
|
||||
|
||||
std::string_view getData() const noexcept { return {data.get(), stats.data_size}; }
|
||||
|
||||
void addChild(StringRef child_path);
|
||||
|
||||
void removeChild(StringRef child_path);
|
||||
|
||||
const auto & getChildren() const noexcept { return children; }
|
||||
auto & getChildren() { return children; }
|
||||
|
||||
// Invalidate the calculated digest so it's recalculated again on the next
|
||||
// getDigest call
|
||||
void invalidateDigestCache() const;
|
||||
|
||||
// get the calculated digest of the node
|
||||
UInt64 getDigest(std::string_view path) const;
|
||||
|
||||
// copy only necessary information for preprocessing and digest calculation
|
||||
// (e.g. we don't need to copy list of children)
|
||||
void shallowCopy(const KeeperMemNode & other);
|
||||
private:
|
||||
ChildrenSet children{};
|
||||
};
|
||||
|
||||
@ -430,18 +315,187 @@ public:
|
||||
};
|
||||
|
||||
using Ephemerals = std::unordered_map<int64_t, std::unordered_set<std::string>>;
|
||||
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<std::string>>;
|
||||
struct WatchInfo
|
||||
{
|
||||
std::string_view path;
|
||||
bool is_list_watch;
|
||||
|
||||
bool operator==(const WatchInfo &) const = default;
|
||||
};
|
||||
|
||||
struct WatchInfoHash
|
||||
{
|
||||
auto operator()(WatchInfo info) const
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(info.path);
|
||||
hash.update(info.is_list_watch);
|
||||
return hash.get64();
|
||||
}
|
||||
};
|
||||
|
||||
using SessionAndWatcher = std::unordered_map<int64_t, std::unordered_set<WatchInfo, WatchInfoHash>>;
|
||||
using SessionIDs = std::unordered_set<int64_t>;
|
||||
|
||||
/// Just vector of SHA1 from user:password
|
||||
using AuthIDs = std::vector<AuthID>;
|
||||
using SessionAndAuth = std::unordered_map<int64_t, AuthIDs>;
|
||||
using Watches = std::unordered_map<String /* path, relative of root_path */, SessionIDs>;
|
||||
using Watches = std::unordered_map<
|
||||
String /* path, relative of root_path */,
|
||||
SessionIDs,
|
||||
StringHashForHeterogeneousLookup,
|
||||
StringHashForHeterogeneousLookup::transparent_key_equal>;
|
||||
|
||||
// Applying ZooKeeper request to storage consists of two steps:
|
||||
// - preprocessing which, instead of applying the changes directly to storage,
|
||||
// generates deltas with those changes, denoted with the request ZXID
|
||||
// - processing which applies deltas with the correct ZXID to the storage
|
||||
//
|
||||
// Delta objects allow us two things:
|
||||
// - fetch the latest, uncommitted state of an object by getting the committed
|
||||
// state of that same object from the storage and applying the deltas
|
||||
// in the same order as they are defined
|
||||
// - quickly commit the changes to the storage
|
||||
struct CreateNodeDelta
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
Coordination::ACLs acls;
|
||||
String data;
|
||||
};
|
||||
|
||||
struct RemoveNodeDelta
|
||||
{
|
||||
int32_t version{-1};
|
||||
NodeStats stat;
|
||||
Coordination::ACLs acls;
|
||||
String data;
|
||||
};
|
||||
|
||||
struct UpdateNodeStatDelta
|
||||
{
|
||||
template <is_any_of<KeeperMemNode, KeeperRocksNode> Node>
|
||||
explicit UpdateNodeStatDelta(const Node & node)
|
||||
: old_stats(node.stats)
|
||||
, new_stats(node.stats)
|
||||
{}
|
||||
|
||||
NodeStats old_stats;
|
||||
NodeStats new_stats;
|
||||
int32_t version{-1};
|
||||
};
|
||||
|
||||
struct UpdateNodeDataDelta
|
||||
{
|
||||
|
||||
std::string old_data;
|
||||
std::string new_data;
|
||||
int32_t version{-1};
|
||||
};
|
||||
|
||||
struct SetACLDelta
|
||||
{
|
||||
Coordination::ACLs old_acls;
|
||||
Coordination::ACLs new_acls;
|
||||
int32_t version{-1};
|
||||
};
|
||||
|
||||
struct ErrorDelta
|
||||
{
|
||||
Coordination::Error error;
|
||||
};
|
||||
|
||||
struct FailedMultiDelta
|
||||
{
|
||||
std::vector<Coordination::Error> error_codes;
|
||||
Coordination::Error global_error{Coordination::Error::ZOK};
|
||||
};
|
||||
|
||||
// Denotes end of a subrequest in multi request
|
||||
struct SubDeltaEnd
|
||||
{
|
||||
};
|
||||
|
||||
struct AddAuthDelta
|
||||
{
|
||||
int64_t session_id;
|
||||
std::shared_ptr<AuthID> auth_id;
|
||||
};
|
||||
|
||||
struct CloseSessionDelta
|
||||
{
|
||||
int64_t session_id;
|
||||
};
|
||||
|
||||
using Operation = std::variant<
|
||||
CreateNodeDelta,
|
||||
RemoveNodeDelta,
|
||||
UpdateNodeStatDelta,
|
||||
UpdateNodeDataDelta,
|
||||
SetACLDelta,
|
||||
AddAuthDelta,
|
||||
ErrorDelta,
|
||||
SubDeltaEnd,
|
||||
FailedMultiDelta,
|
||||
CloseSessionDelta>;
|
||||
|
||||
struct Delta
|
||||
{
|
||||
Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { }
|
||||
|
||||
Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { }
|
||||
|
||||
Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { }
|
||||
|
||||
String path;
|
||||
int64_t zxid;
|
||||
Operation operation;
|
||||
};
|
||||
|
||||
using DeltaIterator = std::list<KeeperStorageBase::Delta>::const_iterator;
|
||||
struct DeltaRange
|
||||
{
|
||||
DeltaIterator begin_it;
|
||||
DeltaIterator end_it;
|
||||
|
||||
auto begin() const
|
||||
{
|
||||
return begin_it;
|
||||
}
|
||||
|
||||
auto end() const
|
||||
{
|
||||
return end_it;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return begin_it == end_it;
|
||||
}
|
||||
|
||||
const auto & front() const
|
||||
{
|
||||
return *begin_it;
|
||||
}
|
||||
};
|
||||
|
||||
struct Stats
|
||||
{
|
||||
std::atomic<uint64_t> nodes_count = 0;
|
||||
std::atomic<uint64_t> approximate_data_size = 0;
|
||||
std::atomic<uint64_t> total_watches_count = 0;
|
||||
std::atomic<uint64_t> watched_paths_count = 0;
|
||||
std::atomic<uint64_t> sessions_with_watches_count = 0;
|
||||
std::atomic<uint64_t> session_with_ephemeral_nodes_count = 0;
|
||||
std::atomic<uint64_t> total_emphemeral_nodes_count = 0;
|
||||
std::atomic<int64_t> last_zxid = 0;
|
||||
};
|
||||
|
||||
Stats stats;
|
||||
|
||||
static bool checkDigest(const Digest & first, const Digest & second);
|
||||
|
||||
};
|
||||
|
||||
|
||||
/// Keeper state machine almost equal to the ZooKeeper's state machine.
|
||||
/// Implements all logic of operations, data changes, sessions allocation.
|
||||
/// In-memory and not thread safe.
|
||||
@ -472,160 +526,73 @@ public:
|
||||
|
||||
int64_t session_id_counter{1};
|
||||
|
||||
SessionAndAuth session_and_auth;
|
||||
mutable SharedMutex auth_mutex;
|
||||
SessionAndAuth committed_session_and_auth;
|
||||
|
||||
mutable SharedMutex storage_mutex;
|
||||
/// Main hashtable with nodes. Contain all information about data.
|
||||
/// All other structures expect session_and_timeout can be restored from
|
||||
/// container.
|
||||
Container container;
|
||||
|
||||
// Applying ZooKeeper request to storage consists of two steps:
|
||||
// - preprocessing which, instead of applying the changes directly to storage,
|
||||
// generates deltas with those changes, denoted with the request ZXID
|
||||
// - processing which applies deltas with the correct ZXID to the storage
|
||||
//
|
||||
// Delta objects allow us two things:
|
||||
// - fetch the latest, uncommitted state of an object by getting the committed
|
||||
// state of that same object from the storage and applying the deltas
|
||||
// in the same order as they are defined
|
||||
// - quickly commit the changes to the storage
|
||||
struct CreateNodeDelta
|
||||
{
|
||||
Coordination::Stat stat;
|
||||
Coordination::ACLs acls;
|
||||
String data;
|
||||
};
|
||||
|
||||
struct RemoveNodeDelta
|
||||
{
|
||||
int32_t version{-1};
|
||||
int64_t ephemeral_owner{0};
|
||||
};
|
||||
|
||||
struct UpdateNodeDelta
|
||||
{
|
||||
std::function<void(Node &)> update_fn;
|
||||
int32_t version{-1};
|
||||
};
|
||||
|
||||
struct SetACLDelta
|
||||
{
|
||||
Coordination::ACLs acls;
|
||||
int32_t version{-1};
|
||||
};
|
||||
|
||||
struct ErrorDelta
|
||||
{
|
||||
Coordination::Error error;
|
||||
};
|
||||
|
||||
struct FailedMultiDelta
|
||||
{
|
||||
std::vector<Coordination::Error> error_codes;
|
||||
Coordination::Error global_error{Coordination::Error::ZOK};
|
||||
};
|
||||
|
||||
// Denotes end of a subrequest in multi request
|
||||
struct SubDeltaEnd
|
||||
{
|
||||
};
|
||||
|
||||
struct AddAuthDelta
|
||||
{
|
||||
int64_t session_id;
|
||||
AuthID auth_id;
|
||||
};
|
||||
|
||||
struct CloseSessionDelta
|
||||
{
|
||||
int64_t session_id;
|
||||
};
|
||||
|
||||
using Operation = std::
|
||||
variant<CreateNodeDelta, RemoveNodeDelta, UpdateNodeDelta, SetACLDelta, AddAuthDelta, ErrorDelta, SubDeltaEnd, FailedMultiDelta, CloseSessionDelta>;
|
||||
|
||||
struct Delta
|
||||
{
|
||||
Delta(String path_, int64_t zxid_, Operation operation_) : path(std::move(path_)), zxid(zxid_), operation(std::move(operation_)) { }
|
||||
|
||||
Delta(int64_t zxid_, Coordination::Error error) : Delta("", zxid_, ErrorDelta{error}) { }
|
||||
|
||||
Delta(int64_t zxid_, Operation subdelta) : Delta("", zxid_, subdelta) { }
|
||||
|
||||
String path;
|
||||
int64_t zxid;
|
||||
Operation operation;
|
||||
};
|
||||
|
||||
struct UncommittedState
|
||||
{
|
||||
explicit UncommittedState(KeeperStorage & storage_) : storage(storage_) { }
|
||||
|
||||
void addDelta(Delta new_delta);
|
||||
void addDeltas(std::vector<Delta> new_deltas);
|
||||
void commit(int64_t commit_zxid);
|
||||
void addDeltas(std::list<Delta> new_deltas);
|
||||
void cleanup(int64_t commit_zxid);
|
||||
void rollback(int64_t rollback_zxid);
|
||||
void rollback(std::list<Delta> rollback_deltas);
|
||||
|
||||
std::shared_ptr<Node> getNode(StringRef path) const;
|
||||
std::shared_ptr<Node> getNode(StringRef path, bool should_lock_storage = true) const;
|
||||
const Node * getActualNodeView(StringRef path, const Node & storage_node) const;
|
||||
|
||||
Coordination::ACLs getACLs(StringRef path) const;
|
||||
|
||||
void applyDeltas(const std::list<Delta> & new_deltas);
|
||||
void applyDelta(const Delta & delta);
|
||||
void rollbackDelta(const Delta & delta);
|
||||
|
||||
bool hasACL(int64_t session_id, bool is_local, std::function<bool(const AuthID &)> predicate) const;
|
||||
|
||||
void forEachAuthInSession(int64_t session_id, std::function<void(const AuthID &)> func) const;
|
||||
|
||||
std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path) const;
|
||||
std::shared_ptr<Node> tryGetNodeFromStorage(StringRef path, bool should_lock_storage = true) const;
|
||||
|
||||
std::unordered_map<int64_t, std::list<const AuthID *>> session_and_auth;
|
||||
std::unordered_set<int64_t> closed_sessions;
|
||||
|
||||
using ZxidToNodes = std::map<int64_t, std::unordered_set<std::string_view>>;
|
||||
struct UncommittedNode
|
||||
{
|
||||
std::shared_ptr<Node> node{nullptr};
|
||||
Coordination::ACLs acls{};
|
||||
int64_t zxid{0};
|
||||
};
|
||||
std::optional<Coordination::ACLs> acls{};
|
||||
std::unordered_set<uint64_t> applied_zxids{};
|
||||
|
||||
struct Hash
|
||||
{
|
||||
auto operator()(const std::string_view view) const
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(view);
|
||||
return hash.get64();
|
||||
}
|
||||
|
||||
using is_transparent = void; // required to make find() work with different type than key_type
|
||||
};
|
||||
|
||||
struct Equal
|
||||
{
|
||||
auto operator()(const std::string_view a,
|
||||
const std::string_view b) const
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
|
||||
using is_transparent = void; // required to make find() work with different type than key_type
|
||||
void materializeACL(const ACLMap & current_acl_map);
|
||||
};
|
||||
|
||||
struct PathCmp
|
||||
{
|
||||
using is_transparent = std::true_type;
|
||||
|
||||
auto operator()(const std::string_view a,
|
||||
const std::string_view b) const
|
||||
{
|
||||
return a.size() < b.size() || (a.size() == b.size() && a < b);
|
||||
size_t level_a = std::count(a.begin(), a.end(), '/');
|
||||
size_t level_b = std::count(b.begin(), b.end(), '/');
|
||||
return level_a < level_b || (level_a == level_b && a < b);
|
||||
}
|
||||
|
||||
using is_transparent = void; // required to make find() work with different type than key_type
|
||||
};
|
||||
|
||||
mutable std::map<std::string, UncommittedNode, PathCmp> nodes;
|
||||
std::unordered_map<std::string, std::list<const Delta *>, Hash, Equal> deltas_for_path;
|
||||
Ephemerals ephemerals;
|
||||
|
||||
std::list<Delta> deltas;
|
||||
std::unordered_map<int64_t, std::list<std::pair<int64_t, std::shared_ptr<AuthID>>>> session_and_auth;
|
||||
|
||||
mutable std::map<std::string, UncommittedNode, PathCmp> nodes;
|
||||
mutable ZxidToNodes zxid_to_nodes;
|
||||
|
||||
mutable std::mutex deltas_mutex;
|
||||
std::list<Delta> deltas TSA_GUARDED_BY(deltas_mutex);
|
||||
KeeperStorage<Container> & storage;
|
||||
};
|
||||
|
||||
@ -635,7 +602,7 @@ public:
|
||||
// with zxid > last_zxid
|
||||
void applyUncommittedState(KeeperStorage & other, int64_t last_log_idx);
|
||||
|
||||
Coordination::Error commit(int64_t zxid);
|
||||
Coordination::Error commit(DeltaRange deltas);
|
||||
|
||||
// Create node in the storage
|
||||
// Returns false if it failed to create the node, true otherwise
|
||||
@ -653,12 +620,11 @@ public:
|
||||
|
||||
bool checkACL(StringRef path, int32_t permissions, int64_t session_id, bool is_local);
|
||||
|
||||
void unregisterEphemeralPath(int64_t session_id, const std::string & path);
|
||||
|
||||
std::mutex ephemeral_mutex;
|
||||
/// Mapping session_id -> set of ephemeral nodes paths
|
||||
Ephemerals ephemerals;
|
||||
/// Mapping session_id -> set of watched nodes paths
|
||||
SessionAndWatcher sessions_and_watchers;
|
||||
Ephemerals committed_ephemerals;
|
||||
size_t committed_ephemeral_nodes{0};
|
||||
|
||||
/// Expiration queue for session, allows to get dead sessions at some point of time
|
||||
SessionExpiryQueue session_expiry_queue;
|
||||
/// All active sessions with timeout
|
||||
@ -667,8 +633,10 @@ public:
|
||||
/// ACLMap for more compact ACLs storage inside nodes.
|
||||
ACLMap acl_map;
|
||||
|
||||
mutable std::mutex transaction_mutex;
|
||||
|
||||
/// Global id of all requests applied to storage
|
||||
int64_t zxid{0};
|
||||
int64_t zxid TSA_GUARDED_BY(transaction_mutex) = 0;
|
||||
|
||||
// older Keeper node (pre V5 snapshots) can create snapshots and receive logs from newer Keeper nodes
|
||||
// this can lead to some inconsistencies, e.g. from snapshot it will use log_idx as zxid
|
||||
@ -685,11 +653,16 @@ public:
|
||||
int64_t log_idx = 0;
|
||||
};
|
||||
|
||||
std::deque<TransactionInfo> uncommitted_transactions;
|
||||
std::list<TransactionInfo> uncommitted_transactions TSA_GUARDED_BY(transaction_mutex);
|
||||
|
||||
uint64_t nodes_digest{0};
|
||||
uint64_t nodes_digest = 0;
|
||||
|
||||
bool finalized{false};
|
||||
std::atomic<bool> finalized{false};
|
||||
|
||||
|
||||
/// Mapping session_id -> set of watched nodes paths
|
||||
SessionAndWatcher sessions_and_watchers;
|
||||
size_t total_watches_count = 0;
|
||||
|
||||
/// Currently active watches (node_path -> subscribed sessions)
|
||||
Watches watches;
|
||||
@ -698,45 +671,30 @@ public:
|
||||
void clearDeadWatches(int64_t session_id);
|
||||
|
||||
/// Get current committed zxid
|
||||
int64_t getZXID() const { return zxid; }
|
||||
int64_t getZXID() const;
|
||||
|
||||
int64_t getNextZXID() const
|
||||
{
|
||||
if (uncommitted_transactions.empty())
|
||||
return zxid + 1;
|
||||
int64_t getNextZXID() const;
|
||||
int64_t getNextZXIDLocked() const TSA_REQUIRES(transaction_mutex);
|
||||
|
||||
return uncommitted_transactions.back().zxid + 1;
|
||||
}
|
||||
|
||||
Digest getNodesDigest(bool committed) const;
|
||||
Digest getNodesDigest(bool committed, bool lock_transaction_mutex) const;
|
||||
|
||||
KeeperContextPtr keeper_context;
|
||||
|
||||
const String superdigest;
|
||||
|
||||
bool initialized{false};
|
||||
std::atomic<bool> initialized{false};
|
||||
|
||||
KeeperStorage(int64_t tick_time_ms, const String & superdigest_, const KeeperContextPtr & keeper_context_, bool initialize_system_nodes = true);
|
||||
|
||||
void initializeSystemNodes();
|
||||
void initializeSystemNodes() TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
|
||||
/// Allocate new session id with the specified timeouts
|
||||
int64_t getSessionID(int64_t session_timeout_ms)
|
||||
{
|
||||
auto result = session_id_counter++;
|
||||
session_and_timeout.emplace(result, session_timeout_ms);
|
||||
session_expiry_queue.addNewSessionOrUpdate(result, session_timeout_ms);
|
||||
return result;
|
||||
}
|
||||
int64_t getSessionID(int64_t session_timeout_ms);
|
||||
|
||||
/// Add session id. Used when restoring KeeperStorage from snapshot.
|
||||
void addSessionID(int64_t session_id, int64_t session_timeout_ms)
|
||||
{
|
||||
session_and_timeout.emplace(session_id, session_timeout_ms);
|
||||
session_expiry_queue.addNewSessionOrUpdate(session_id, session_timeout_ms);
|
||||
}
|
||||
void addSessionID(int64_t session_id, int64_t session_timeout_ms) TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
|
||||
UInt64 calculateNodesDigest(UInt64 current_digest, const std::vector<Delta> & new_deltas) const;
|
||||
UInt64 calculateNodesDigest(UInt64 current_digest, const std::list<Delta> & new_deltas) const;
|
||||
|
||||
/// Process user request and return response.
|
||||
/// check_acl = false only when converting data from ZooKeeper.
|
||||
@ -763,42 +721,39 @@ public:
|
||||
/// Set of methods for creating snapshots
|
||||
|
||||
/// Turn on snapshot mode, so data inside Container is not deleted, but replaced with new version.
|
||||
void enableSnapshotMode(size_t up_to_version)
|
||||
{
|
||||
container.enableSnapshotMode(up_to_version);
|
||||
}
|
||||
void enableSnapshotMode(size_t up_to_version);
|
||||
|
||||
/// Turn off snapshot mode.
|
||||
void disableSnapshotMode()
|
||||
{
|
||||
container.disableSnapshotMode();
|
||||
}
|
||||
void disableSnapshotMode();
|
||||
|
||||
Container::const_iterator getSnapshotIteratorBegin() const { return container.begin(); }
|
||||
Container::const_iterator getSnapshotIteratorBegin() const;
|
||||
|
||||
/// Clear outdated data from internal container.
|
||||
void clearGarbageAfterSnapshot() { container.clearOutdatedNodes(); }
|
||||
void clearGarbageAfterSnapshot();
|
||||
|
||||
/// Get all active sessions
|
||||
const SessionAndTimeout & getActiveSessions() const { return session_and_timeout; }
|
||||
SessionAndTimeout getActiveSessions() const;
|
||||
|
||||
/// Get all dead sessions
|
||||
std::vector<int64_t> getDeadSessions() const { return session_expiry_queue.getExpiredSessions(); }
|
||||
std::vector<int64_t> getDeadSessions() const;
|
||||
|
||||
void updateStats();
|
||||
const Stats & getStorageStats() const;
|
||||
|
||||
/// Introspection functions mostly used in 4-letter commands
|
||||
uint64_t getNodesCount() const { return container.size(); }
|
||||
uint64_t getNodesCount() const;
|
||||
|
||||
uint64_t getApproximateDataSize() const { return container.getApproximateDataSize(); }
|
||||
uint64_t getApproximateDataSize() const;
|
||||
|
||||
uint64_t getArenaDataSize() const { return container.keyArenaSize(); }
|
||||
uint64_t getArenaDataSize() const;
|
||||
|
||||
uint64_t getTotalWatchesCount() const;
|
||||
|
||||
uint64_t getWatchedPathsCount() const { return watches.size() + list_watches.size(); }
|
||||
uint64_t getWatchedPathsCount() const;
|
||||
|
||||
uint64_t getSessionsWithWatchesCount() const;
|
||||
|
||||
uint64_t getSessionWithEphemeralNodesCount() const { return ephemerals.size(); }
|
||||
uint64_t getSessionWithEphemeralNodesCount() const;
|
||||
uint64_t getTotalEphemeralNodesCount() const;
|
||||
|
||||
void dumpWatches(WriteBufferFromOwnString & buf) const;
|
||||
|
@ -155,11 +155,11 @@ public:
|
||||
ReadBufferFromOwnString buffer(iter->value().ToStringView());
|
||||
typename Node::Meta & meta = new_pair->value;
|
||||
readPODBinary(meta, buffer);
|
||||
readVarUInt(new_pair->value.data_size, buffer);
|
||||
if (new_pair->value.data_size)
|
||||
readVarUInt(new_pair->value.stats.data_size, buffer);
|
||||
if (new_pair->value.stats.data_size)
|
||||
{
|
||||
new_pair->value.data = std::unique_ptr<char[]>(new char[new_pair->value.data_size]);
|
||||
buffer.readStrict(new_pair->value.data.get(), new_pair->value.data_size);
|
||||
new_pair->value.data = std::unique_ptr<char[]>(new char[new_pair->value.stats.data_size]);
|
||||
buffer.readStrict(new_pair->value.data.get(), new_pair->value.stats.data_size);
|
||||
}
|
||||
pair = new_pair;
|
||||
}
|
||||
@ -211,7 +211,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, Node>> getChildren(const std::string & key_)
|
||||
std::vector<std::pair<std::string, Node>> getChildren(const std::string & key_, bool read_data = false)
|
||||
{
|
||||
rocksdb::ReadOptions read_options;
|
||||
read_options.total_order_seek = true;
|
||||
@ -232,6 +232,15 @@ public:
|
||||
typename Node::Meta & meta = node;
|
||||
/// We do not read data here
|
||||
readPODBinary(meta, buffer);
|
||||
if (read_data)
|
||||
{
|
||||
readVarUInt(meta.stats.data_size, buffer);
|
||||
if (meta.stats.data_size)
|
||||
{
|
||||
node.data = std::unique_ptr<char[]>(new char[meta.stats.data_size]);
|
||||
buffer.readStrict(node.data.get(), meta.stats.data_size);
|
||||
}
|
||||
}
|
||||
std::string real_key(iter->key().data() + len, iter->key().size() - len);
|
||||
// std::cout << "real key: " << real_key << std::endl;
|
||||
result.emplace_back(std::move(real_key), std::move(node));
|
||||
@ -268,11 +277,11 @@ public:
|
||||
typename Node::Meta & meta = kv->value;
|
||||
readPODBinary(meta, buffer);
|
||||
/// TODO: Sometimes we don't need to load data.
|
||||
readVarUInt(kv->value.data_size, buffer);
|
||||
if (kv->value.data_size)
|
||||
readVarUInt(kv->value.stats.data_size, buffer);
|
||||
if (kv->value.stats.data_size)
|
||||
{
|
||||
kv->value.data = std::unique_ptr<char[]>(new char[kv->value.data_size]);
|
||||
buffer.readStrict(kv->value.data.get(), kv->value.data_size);
|
||||
kv->value.data = std::unique_ptr<char[]>(new char[kv->value.stats.data_size]);
|
||||
buffer.readStrict(kv->value.data.get(), kv->value.stats.data_size);
|
||||
}
|
||||
return const_iterator(kv);
|
||||
}
|
||||
@ -281,7 +290,7 @@ public:
|
||||
{
|
||||
auto it = find(key);
|
||||
chassert(it != end());
|
||||
return MockNode(it->value.numChildren(), it->value.getData());
|
||||
return MockNode(it->value.stats.numChildren(), it->value.getData());
|
||||
}
|
||||
|
||||
const_iterator updateValue(StringRef key_, ValueUpdater updater)
|
||||
|
@ -93,7 +93,7 @@ void deserializeACLMap(Storage & storage, ReadBuffer & in)
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log)
|
||||
int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log) TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
int64_t max_zxid = 0;
|
||||
std::string path;
|
||||
@ -108,33 +108,33 @@ int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log
|
||||
Coordination::read(node.acl_id, in);
|
||||
|
||||
/// Deserialize stat
|
||||
Coordination::read(node.czxid, in);
|
||||
Coordination::read(node.mzxid, in);
|
||||
Coordination::read(node.stats.czxid, in);
|
||||
Coordination::read(node.stats.mzxid, in);
|
||||
/// For some reason ZXID specified in filename can be smaller
|
||||
/// then actual zxid from nodes. In this case we will use zxid from nodes.
|
||||
max_zxid = std::max(max_zxid, node.mzxid);
|
||||
max_zxid = std::max(max_zxid, node.stats.mzxid);
|
||||
|
||||
int64_t ctime;
|
||||
Coordination::read(ctime, in);
|
||||
node.setCtime(ctime);
|
||||
Coordination::read(node.mtime, in);
|
||||
Coordination::read(node.version, in);
|
||||
Coordination::read(node.cversion, in);
|
||||
Coordination::read(node.aversion, in);
|
||||
node.stats.setCtime(ctime);
|
||||
Coordination::read(node.stats.mtime, in);
|
||||
Coordination::read(node.stats.version, in);
|
||||
Coordination::read(node.stats.cversion, in);
|
||||
Coordination::read(node.stats.aversion, in);
|
||||
int64_t ephemeral_owner;
|
||||
Coordination::read(ephemeral_owner, in);
|
||||
if (ephemeral_owner != 0)
|
||||
node.setEphemeralOwner(ephemeral_owner);
|
||||
Coordination::read(node.pzxid, in);
|
||||
node.stats.setEphemeralOwner(ephemeral_owner);
|
||||
Coordination::read(node.stats.pzxid, in);
|
||||
if (!path.empty())
|
||||
{
|
||||
if (ephemeral_owner == 0)
|
||||
node.setSeqNum(node.cversion);
|
||||
node.stats.setSeqNum(node.stats.cversion);
|
||||
|
||||
storage.container.insertOrReplace(path, node);
|
||||
|
||||
if (ephemeral_owner != 0)
|
||||
storage.ephemerals[ephemeral_owner].insert(path);
|
||||
storage.committed_ephemerals[ephemeral_owner].insert(path);
|
||||
|
||||
storage.acl_map.addUsage(node.acl_id);
|
||||
}
|
||||
@ -149,7 +149,13 @@ int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log
|
||||
if (itr.key != "/")
|
||||
{
|
||||
auto parent_path = parentNodePath(itr.key);
|
||||
storage.container.updateValue(parent_path, [my_path = itr.key] (typename Storage::Node & value) { value.addChild(getBaseNodeName(my_path)); value.increaseNumChildren(); });
|
||||
storage.container.updateValue(
|
||||
parent_path,
|
||||
[my_path = itr.key](typename Storage::Node & value)
|
||||
{
|
||||
value.addChild(getBaseNodeName(my_path));
|
||||
value.stats.increaseNumChildren();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,7 +163,7 @@ int64_t deserializeStorageData(Storage & storage, ReadBuffer & in, LoggerPtr log
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log)
|
||||
void deserializeKeeperStorageFromSnapshot(Storage & storage, const std::string & snapshot_path, LoggerPtr log) TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
LOG_INFO(log, "Deserializing storage snapshot {}", snapshot_path);
|
||||
int64_t zxid = getZxidFromName(snapshot_path);
|
||||
@ -487,7 +493,7 @@ bool hasErrorsInMultiRequest(Coordination::ZooKeeperRequestPtr request)
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
bool deserializeTxn(Storage & storage, ReadBuffer & in, LoggerPtr /*log*/)
|
||||
bool deserializeTxn(Storage & storage, ReadBuffer & in, LoggerPtr /*log*/) TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
int64_t checksum;
|
||||
Coordination::read(checksum, in);
|
||||
@ -568,7 +574,7 @@ void deserializeLogAndApplyToStorage(Storage & storage, const std::string & log_
|
||||
}
|
||||
|
||||
template<typename Storage>
|
||||
void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log)
|
||||
void deserializeLogsAndApplyToStorage(Storage & storage, const std::string & path, LoggerPtr log) TSA_NO_THREAD_SAFETY_ANALYSIS
|
||||
{
|
||||
std::map<int64_t, std::string> existing_logs;
|
||||
for (const auto & p : fs::directory_iterator(path))
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <chrono>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "base/defines.h"
|
||||
#include "config.h"
|
||||
|
||||
#if USE_NURAFT
|
||||
@ -1540,7 +1541,7 @@ void addNode(Storage & storage, const std::string & path, const std::string & da
|
||||
using Node = typename Storage::Node;
|
||||
Node node{};
|
||||
node.setData(data);
|
||||
node.setEphemeralOwner(ephemeral_owner);
|
||||
node.stats.setEphemeralOwner(ephemeral_owner);
|
||||
storage.container.insertOrReplace(path, node);
|
||||
auto child_it = storage.container.find(path);
|
||||
auto child_path = DB::getBaseNodeName(child_it->key);
|
||||
@ -1549,7 +1550,7 @@ void addNode(Storage & storage, const std::string & path, const std::string & da
|
||||
[&](auto & parent)
|
||||
{
|
||||
parent.addChild(child_path);
|
||||
parent.increaseNumChildren();
|
||||
parent.stats.increaseNumChildren();
|
||||
});
|
||||
}
|
||||
|
||||
@ -1570,9 +1571,9 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotSimple)
|
||||
addNode(storage, "/hello1", "world", 1);
|
||||
addNode(storage, "/hello2", "somedata", 3);
|
||||
storage.session_id_counter = 5;
|
||||
storage.zxid = 2;
|
||||
storage.ephemerals[3] = {"/hello2"};
|
||||
storage.ephemerals[1] = {"/hello1"};
|
||||
TSA_SUPPRESS_WARNING_FOR_WRITE(storage.zxid) = 2;
|
||||
storage.committed_ephemerals[3] = {"/hello2"};
|
||||
storage.committed_ephemerals[1] = {"/hello1"};
|
||||
storage.getSessionID(130);
|
||||
storage.getSessionID(130);
|
||||
|
||||
@ -1601,10 +1602,10 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotSimple)
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world");
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata");
|
||||
EXPECT_EQ(restored_storage->session_id_counter, 7);
|
||||
EXPECT_EQ(restored_storage->zxid, 2);
|
||||
EXPECT_EQ(restored_storage->ephemerals.size(), 2);
|
||||
EXPECT_EQ(restored_storage->ephemerals[3].size(), 1);
|
||||
EXPECT_EQ(restored_storage->ephemerals[1].size(), 1);
|
||||
EXPECT_EQ(restored_storage->getZXID(), 2);
|
||||
EXPECT_EQ(restored_storage->committed_ephemerals.size(), 2);
|
||||
EXPECT_EQ(restored_storage->committed_ephemerals[3].size(), 1);
|
||||
EXPECT_EQ(restored_storage->committed_ephemerals[1].size(), 1);
|
||||
EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
|
||||
}
|
||||
|
||||
@ -2027,7 +2028,7 @@ TYPED_TEST(CoordinationTest, TestEphemeralNodeRemove)
|
||||
state_machine->commit(1, entry_c->get_buf());
|
||||
const auto & storage = state_machine->getStorageUnsafe();
|
||||
|
||||
EXPECT_EQ(storage.ephemerals.size(), 1);
|
||||
EXPECT_EQ(storage.committed_ephemerals.size(), 1);
|
||||
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
|
||||
request_d->path = "/hello";
|
||||
/// Delete from other session
|
||||
@ -2035,7 +2036,7 @@ TYPED_TEST(CoordinationTest, TestEphemeralNodeRemove)
|
||||
state_machine->pre_commit(2, entry_d->get_buf());
|
||||
state_machine->commit(2, entry_d->get_buf());
|
||||
|
||||
EXPECT_EQ(storage.ephemerals.size(), 0);
|
||||
EXPECT_EQ(storage.committed_ephemerals.size(), 0);
|
||||
}
|
||||
|
||||
|
||||
@ -2590,9 +2591,9 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotDifferentCompressions)
|
||||
addNode(storage, "/hello1", "world", 1);
|
||||
addNode(storage, "/hello2", "somedata", 3);
|
||||
storage.session_id_counter = 5;
|
||||
storage.zxid = 2;
|
||||
storage.ephemerals[3] = {"/hello2"};
|
||||
storage.ephemerals[1] = {"/hello1"};
|
||||
TSA_SUPPRESS_WARNING_FOR_WRITE(storage.zxid) = 2;
|
||||
storage.committed_ephemerals[3] = {"/hello2"};
|
||||
storage.committed_ephemerals[1] = {"/hello1"};
|
||||
storage.getSessionID(130);
|
||||
storage.getSessionID(130);
|
||||
|
||||
@ -2617,10 +2618,10 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotDifferentCompressions)
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello1").getData(), "world");
|
||||
EXPECT_EQ(restored_storage->container.getValue("/hello2").getData(), "somedata");
|
||||
EXPECT_EQ(restored_storage->session_id_counter, 7);
|
||||
EXPECT_EQ(restored_storage->zxid, 2);
|
||||
EXPECT_EQ(restored_storage->ephemerals.size(), 2);
|
||||
EXPECT_EQ(restored_storage->ephemerals[3].size(), 1);
|
||||
EXPECT_EQ(restored_storage->ephemerals[1].size(), 1);
|
||||
EXPECT_EQ(restored_storage->getZXID(), 2);
|
||||
EXPECT_EQ(restored_storage->committed_ephemerals.size(), 2);
|
||||
EXPECT_EQ(restored_storage->committed_ephemerals[3].size(), 1);
|
||||
EXPECT_EQ(restored_storage->committed_ephemerals[1].size(), 1);
|
||||
EXPECT_EQ(restored_storage->session_and_timeout.size(), 2);
|
||||
}
|
||||
|
||||
@ -2805,13 +2806,13 @@ TYPED_TEST(CoordinationTest, TestStorageSnapshotEqual)
|
||||
|
||||
storage.session_id_counter = 5;
|
||||
|
||||
storage.ephemerals[3] = {"/hello"};
|
||||
storage.ephemerals[1] = {"/hello/somepath"};
|
||||
storage.committed_ephemerals[3] = {"/hello"};
|
||||
storage.committed_ephemerals[1] = {"/hello/somepath"};
|
||||
|
||||
for (size_t j = 0; j < 3333; ++j)
|
||||
storage.getSessionID(130 * j);
|
||||
|
||||
DB::KeeperStorageSnapshot<Storage> snapshot(&storage, storage.zxid);
|
||||
DB::KeeperStorageSnapshot<Storage> snapshot(&storage, storage.getZXID());
|
||||
|
||||
auto buf = manager.serializeSnapshotToBuffer(snapshot);
|
||||
|
||||
@ -3315,7 +3316,7 @@ TYPED_TEST(CoordinationTest, TestCheckNotExistsRequest)
|
||||
create_path("/test_node");
|
||||
auto node_it = storage.container.find("/test_node");
|
||||
ASSERT_NE(node_it, storage.container.end());
|
||||
auto node_version = node_it->value.version;
|
||||
auto node_version = node_it->value.stats.version;
|
||||
|
||||
{
|
||||
SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS");
|
||||
@ -3566,12 +3567,12 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveRequest)
|
||||
{
|
||||
SCOPED_TRACE("Recursive Remove Ephemeral");
|
||||
create("/T7", zkutil::CreateMode::Ephemeral);
|
||||
ASSERT_EQ(storage.ephemerals.size(), 1);
|
||||
ASSERT_EQ(storage.committed_ephemerals.size(), 1);
|
||||
|
||||
auto responses = remove_recursive("/T7", 100);
|
||||
ASSERT_EQ(responses.size(), 1);
|
||||
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
|
||||
ASSERT_EQ(storage.ephemerals.size(), 0);
|
||||
ASSERT_EQ(storage.committed_ephemerals.size(), 0);
|
||||
ASSERT_FALSE(exists("/T7"));
|
||||
}
|
||||
|
||||
@ -3581,12 +3582,12 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveRequest)
|
||||
create("/T8/A", zkutil::CreateMode::Persistent);
|
||||
create("/T8/B", zkutil::CreateMode::Ephemeral);
|
||||
create("/T8/A/C", zkutil::CreateMode::Ephemeral);
|
||||
ASSERT_EQ(storage.ephemerals.size(), 1);
|
||||
ASSERT_EQ(storage.committed_ephemerals.size(), 1);
|
||||
|
||||
auto responses = remove_recursive("/T8", 4);
|
||||
ASSERT_EQ(responses.size(), 1);
|
||||
ASSERT_EQ(responses[0].response->error, Coordination::Error::ZOK);
|
||||
ASSERT_EQ(storage.ephemerals.size(), 0);
|
||||
ASSERT_EQ(storage.committed_ephemerals.size(), 0);
|
||||
ASSERT_FALSE(exists("/T8"));
|
||||
ASSERT_FALSE(exists("/T8/A"));
|
||||
ASSERT_FALSE(exists("/T8/B"));
|
||||
@ -3738,6 +3739,72 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveInMultiRequest)
|
||||
ASSERT_FALSE(exists("/A/B"));
|
||||
ASSERT_FALSE(exists("/A/B/D"));
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Recursive Remove For Subtree With Updated Node");
|
||||
int create_zxid = ++zxid;
|
||||
auto ops = prepare_create_tree();
|
||||
|
||||
/// First create nodes
|
||||
const auto create_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(create_request, 1, 0, create_zxid);
|
||||
auto create_responses = storage.processRequest(create_request, 1, create_zxid);
|
||||
ASSERT_EQ(create_responses.size(), 1);
|
||||
ASSERT_TRUE(is_multi_ok(create_responses[0].response));
|
||||
|
||||
/// Small limit
|
||||
int remove_zxid = ++zxid;
|
||||
ops = {
|
||||
zkutil::makeSetRequest("/A/B", "", -1),
|
||||
zkutil::makeRemoveRecursiveRequest("/A", 3),
|
||||
};
|
||||
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
|
||||
auto remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
|
||||
|
||||
ASSERT_EQ(remove_responses.size(), 1);
|
||||
ASSERT_FALSE(is_multi_ok(remove_responses[0].response));
|
||||
|
||||
/// Big limit
|
||||
remove_zxid = ++zxid;
|
||||
ops[1] = zkutil::makeRemoveRecursiveRequest("/A", 4);
|
||||
remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
|
||||
remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
|
||||
|
||||
ASSERT_EQ(remove_responses.size(), 1);
|
||||
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
|
||||
ASSERT_FALSE(exists("/A"));
|
||||
ASSERT_FALSE(exists("/A/C"));
|
||||
ASSERT_FALSE(exists("/A/B"));
|
||||
ASSERT_FALSE(exists("/A/B/D"));
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("[BUG] Recursive Remove Level Sorting");
|
||||
int new_zxid = ++zxid;
|
||||
|
||||
Coordination::Requests ops = {
|
||||
zkutil::makeCreateRequest("/a", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/a/bbbbbb", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/A", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/A/B", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/A/CCCCCCCCCCCC", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeRemoveRecursiveRequest("/A", 3),
|
||||
};
|
||||
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
|
||||
auto remove_responses = storage.processRequest(remove_request, 1, new_zxid);
|
||||
|
||||
ASSERT_EQ(remove_responses.size(), 1);
|
||||
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
|
||||
ASSERT_TRUE(exists("/a"));
|
||||
ASSERT_TRUE(exists("/a/bbbbbb"));
|
||||
ASSERT_FALSE(exists("/A"));
|
||||
ASSERT_FALSE(exists("/A/B"));
|
||||
ASSERT_FALSE(exists("/A/CCCCCCCCCCCC"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)
|
||||
@ -3823,14 +3890,26 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)
|
||||
auto responses = storage.processRequest(remove_request, 1, new_zxid);
|
||||
|
||||
ASSERT_EQ(responses.size(), 7);
|
||||
/// request response is last
|
||||
ASSERT_EQ(dynamic_cast<Coordination::ZooKeeperWatchResponse *>(responses.back().response.get()), nullptr);
|
||||
|
||||
for (size_t i = 0; i < 7; ++i)
|
||||
std::unordered_map<std::string, std::vector<Coordination::Event>> expected_watch_responses
|
||||
{
|
||||
{"/A/B/D", {Coordination::Event::DELETED}},
|
||||
{"/A/B", {Coordination::Event::CHILD, Coordination::Event::DELETED}},
|
||||
{"/A/C", {Coordination::Event::DELETED}},
|
||||
{"/A", {Coordination::Event::CHILD, Coordination::Event::DELETED}},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, std::vector<Coordination::Event>> actual_watch_responses;
|
||||
for (size_t i = 0; i < 6; ++i)
|
||||
{
|
||||
ASSERT_EQ(responses[i].response->error, Coordination::Error::ZOK);
|
||||
|
||||
if (const auto * watch_response = dynamic_cast<Coordination::ZooKeeperWatchResponse *>(responses[i].response.get()))
|
||||
ASSERT_EQ(watch_response->type, Coordination::Event::DELETED);
|
||||
const auto & watch_response = dynamic_cast<Coordination::ZooKeeperWatchResponse &>(*responses[i].response);
|
||||
actual_watch_responses[watch_response.path].push_back(static_cast<Coordination::Event>(watch_response.type));
|
||||
}
|
||||
ASSERT_EQ(expected_watch_responses, actual_watch_responses);
|
||||
|
||||
ASSERT_EQ(storage.watches.size(), 0);
|
||||
ASSERT_EQ(storage.list_watches.size(), 0);
|
||||
|
@ -66,7 +66,6 @@
|
||||
#include <Access/SettingsConstraintsAndProfileIDs.h>
|
||||
#include <Access/ExternalAuthenticators.h>
|
||||
#include <Access/GSSAcceptor.h>
|
||||
#include <Common/Scheduler/ResourceManagerFactory.h>
|
||||
#include <Backups/BackupsWorker.h>
|
||||
#include <Dictionaries/Embedded/GeoDictionariesLoader.h>
|
||||
#include <Interpreters/EmbeddedDictionaries.h>
|
||||
@ -91,6 +90,8 @@
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Common/Scheduler/createResourceManager.h>
|
||||
#include <Common/Scheduler/Workload/createWorkloadEntityStorage.h>
|
||||
#include <Common/StackTrace.h>
|
||||
#include <Common/Config/ConfigHelper.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
@ -274,6 +275,9 @@ struct ContextSharedPart : boost::noncopyable
|
||||
mutable OnceFlag user_defined_sql_objects_storage_initialized;
|
||||
mutable std::unique_ptr<IUserDefinedSQLObjectsStorage> user_defined_sql_objects_storage;
|
||||
|
||||
mutable OnceFlag workload_entity_storage_initialized;
|
||||
mutable std::unique_ptr<IWorkloadEntityStorage> workload_entity_storage;
|
||||
|
||||
#if USE_NLP
|
||||
mutable OnceFlag synonyms_extensions_initialized;
|
||||
mutable std::optional<SynonymsExtensions> synonyms_extensions;
|
||||
@ -615,6 +619,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
SHUTDOWN(log, "dictionaries loader", external_dictionaries_loader, enablePeriodicUpdates(false));
|
||||
SHUTDOWN(log, "UDFs loader", external_user_defined_executable_functions_loader, enablePeriodicUpdates(false));
|
||||
SHUTDOWN(log, "another UDFs storage", user_defined_sql_objects_storage, stopWatching());
|
||||
SHUTDOWN(log, "workload entity storage", workload_entity_storage, stopWatching());
|
||||
|
||||
LOG_TRACE(log, "Shutting down named sessions");
|
||||
Session::shutdownNamedSessions();
|
||||
@ -646,6 +651,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
std::unique_ptr<ExternalDictionariesLoader> delete_external_dictionaries_loader;
|
||||
std::unique_ptr<ExternalUserDefinedExecutableFunctionsLoader> delete_external_user_defined_executable_functions_loader;
|
||||
std::unique_ptr<IUserDefinedSQLObjectsStorage> delete_user_defined_sql_objects_storage;
|
||||
std::unique_ptr<IWorkloadEntityStorage> delete_workload_entity_storage;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_buffer_flush_schedule_pool;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_schedule_pool;
|
||||
std::unique_ptr<BackgroundSchedulePool> delete_distributed_schedule_pool;
|
||||
@ -730,6 +736,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
delete_external_dictionaries_loader = std::move(external_dictionaries_loader);
|
||||
delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader);
|
||||
delete_user_defined_sql_objects_storage = std::move(user_defined_sql_objects_storage);
|
||||
delete_workload_entity_storage = std::move(workload_entity_storage);
|
||||
delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool);
|
||||
delete_schedule_pool = std::move(schedule_pool);
|
||||
delete_distributed_schedule_pool = std::move(distributed_schedule_pool);
|
||||
@ -748,6 +755,7 @@ struct ContextSharedPart : boost::noncopyable
|
||||
delete_external_dictionaries_loader.reset();
|
||||
delete_external_user_defined_executable_functions_loader.reset();
|
||||
delete_user_defined_sql_objects_storage.reset();
|
||||
delete_workload_entity_storage.reset();
|
||||
delete_ddl_worker.reset();
|
||||
delete_buffer_flush_schedule_pool.reset();
|
||||
delete_schedule_pool.reset();
|
||||
@ -1674,7 +1682,7 @@ std::vector<UUID> Context::getEnabledProfiles() const
|
||||
ResourceManagerPtr Context::getResourceManager() const
|
||||
{
|
||||
callOnce(shared->resource_manager_initialized, [&] {
|
||||
shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "dynamic"));
|
||||
shared->resource_manager = createResourceManager(getGlobalContext());
|
||||
});
|
||||
|
||||
return shared->resource_manager;
|
||||
@ -2909,6 +2917,32 @@ void Context::setUserDefinedSQLObjectsStorage(std::unique_ptr<IUserDefinedSQLObj
|
||||
shared->user_defined_sql_objects_storage = std::move(storage);
|
||||
}
|
||||
|
||||
const IWorkloadEntityStorage & Context::getWorkloadEntityStorage() const
|
||||
{
|
||||
callOnce(shared->workload_entity_storage_initialized, [&] {
|
||||
shared->workload_entity_storage = createWorkloadEntityStorage(getGlobalContext());
|
||||
});
|
||||
|
||||
SharedLockGuard lock(shared->mutex);
|
||||
return *shared->workload_entity_storage;
|
||||
}
|
||||
|
||||
IWorkloadEntityStorage & Context::getWorkloadEntityStorage()
|
||||
{
|
||||
callOnce(shared->workload_entity_storage_initialized, [&] {
|
||||
shared->workload_entity_storage = createWorkloadEntityStorage(getGlobalContext());
|
||||
});
|
||||
|
||||
std::lock_guard lock(shared->mutex);
|
||||
return *shared->workload_entity_storage;
|
||||
}
|
||||
|
||||
void Context::setWorkloadEntityStorage(std::unique_ptr<IWorkloadEntityStorage> storage)
|
||||
{
|
||||
std::lock_guard lock(shared->mutex);
|
||||
shared->workload_entity_storage = std::move(storage);
|
||||
}
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
SynonymsExtensions & Context::getSynonymsExtensions() const
|
||||
|
@ -70,6 +70,7 @@ class EmbeddedDictionaries;
|
||||
class ExternalDictionariesLoader;
|
||||
class ExternalUserDefinedExecutableFunctionsLoader;
|
||||
class IUserDefinedSQLObjectsStorage;
|
||||
class IWorkloadEntityStorage;
|
||||
class InterserverCredentials;
|
||||
using InterserverCredentialsPtr = std::shared_ptr<const InterserverCredentials>;
|
||||
class InterserverIOHandler;
|
||||
@ -880,6 +881,10 @@ public:
|
||||
void setUserDefinedSQLObjectsStorage(std::unique_ptr<IUserDefinedSQLObjectsStorage> storage);
|
||||
void loadOrReloadUserDefinedExecutableFunctions(const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
const IWorkloadEntityStorage & getWorkloadEntityStorage() const;
|
||||
IWorkloadEntityStorage & getWorkloadEntityStorage();
|
||||
void setWorkloadEntityStorage(std::unique_ptr<IWorkloadEntityStorage> storage);
|
||||
|
||||
#if USE_NLP
|
||||
SynonymsExtensions & getSynonymsExtensions() const;
|
||||
Lemmatizers & getLemmatizers() const;
|
||||
|
@ -338,11 +338,8 @@ size_t HashJoin::getTotalRowCount() const
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t HashJoin::getTotalByteCount() const
|
||||
void HashJoin::doDebugAsserts() const
|
||||
{
|
||||
if (!data)
|
||||
return 0;
|
||||
|
||||
#ifndef NDEBUG
|
||||
size_t debug_blocks_allocated_size = 0;
|
||||
for (const auto & block : data->blocks)
|
||||
@ -360,6 +357,14 @@ size_t HashJoin::getTotalByteCount() const
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})",
|
||||
data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
size_t HashJoin::getTotalByteCount() const
|
||||
{
|
||||
if (!data)
|
||||
return 0;
|
||||
|
||||
doDebugAsserts();
|
||||
|
||||
size_t res = 0;
|
||||
|
||||
@ -544,9 +549,11 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
|
||||
have_compressed = true;
|
||||
}
|
||||
|
||||
doDebugAsserts();
|
||||
data->blocks_allocated_size += block_to_save.allocatedBytes();
|
||||
data->blocks.emplace_back(std::move(block_to_save));
|
||||
Block * stored_block = &data->blocks.back();
|
||||
doDebugAsserts();
|
||||
|
||||
if (rows)
|
||||
data->empty = false;
|
||||
@ -634,9 +641,11 @@ bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits)
|
||||
|
||||
if (!flag_per_row && !is_inserted)
|
||||
{
|
||||
doDebugAsserts();
|
||||
LOG_TRACE(log, "Skipping inserting block with {} rows", rows);
|
||||
data->blocks_allocated_size -= stored_block->allocatedBytes();
|
||||
data->blocks.pop_back();
|
||||
doDebugAsserts();
|
||||
}
|
||||
|
||||
if (!check_limits)
|
||||
@ -683,6 +692,8 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_
|
||||
|
||||
for (auto & stored_block : data->blocks)
|
||||
{
|
||||
doDebugAsserts();
|
||||
|
||||
size_t old_size = stored_block.allocatedBytes();
|
||||
stored_block = stored_block.shrinkToFit();
|
||||
size_t new_size = stored_block.allocatedBytes();
|
||||
@ -700,6 +711,8 @@ void HashJoin::shrinkStoredBlocksToFit(size_t & total_bytes_in_join, bool force_
|
||||
else
|
||||
/// Sometimes after clone resized block can be bigger than original
|
||||
data->blocks_allocated_size += new_size - old_size;
|
||||
|
||||
doDebugAsserts();
|
||||
}
|
||||
|
||||
auto new_total_bytes_in_join = getTotalByteCount();
|
||||
@ -1416,7 +1429,13 @@ void HashJoin::tryRerangeRightTableDataImpl(Map & map [[maybe_unused]])
|
||||
};
|
||||
BlocksList sorted_blocks;
|
||||
visit_rows_map(sorted_blocks, map);
|
||||
doDebugAsserts();
|
||||
data->blocks.swap(sorted_blocks);
|
||||
size_t new_blocks_allocated_size = 0;
|
||||
for (const auto & block : data->blocks)
|
||||
new_blocks_allocated_size += block.allocatedBytes();
|
||||
data->blocks_allocated_size = new_blocks_allocated_size;
|
||||
doDebugAsserts();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -470,6 +470,7 @@ private:
|
||||
void tryRerangeRightTableData() override;
|
||||
template <JoinKind KIND, typename Map, JoinStrictness STRICTNESS>
|
||||
void tryRerangeRightTableDataImpl(Map & map);
|
||||
void doDebugAsserts() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
68
src/Interpreters/InterpreterCreateResourceQuery.cpp
Normal file
68
src/Interpreters/InterpreterCreateResourceQuery.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterCreateResourceQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterCreateResourceQuery::execute()
|
||||
{
|
||||
ASTCreateResourceQuery & create_resource_query = query_ptr->as<ASTCreateResourceQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::CREATE_RESOURCE);
|
||||
|
||||
if (create_resource_query.or_replace)
|
||||
access_rights_elements.emplace_back(AccessType::DROP_RESOURCE);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!create_resource_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
auto resource_name = create_resource_query.getResourceName();
|
||||
bool throw_if_exists = !create_resource_query.if_not_exists && !create_resource_query.or_replace;
|
||||
bool replace_if_exists = create_resource_query.or_replace;
|
||||
|
||||
current_context->getWorkloadEntityStorage().storeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Resource,
|
||||
resource_name,
|
||||
query_ptr,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
current_context->getSettingsRef());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterCreateResourceQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterCreateResourceQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterCreateResourceQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
25
src/Interpreters/InterpreterCreateResourceQuery.h
Normal file
25
src/Interpreters/InterpreterCreateResourceQuery.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterCreateResourceQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterCreateResourceQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
|
||||
: WithMutableContext(context_), query_ptr(query_ptr_)
|
||||
{
|
||||
}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
68
src/Interpreters/InterpreterCreateWorkloadQuery.cpp
Normal file
68
src/Interpreters/InterpreterCreateWorkloadQuery.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterCreateWorkloadQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterCreateWorkloadQuery::execute()
|
||||
{
|
||||
ASTCreateWorkloadQuery & create_workload_query = query_ptr->as<ASTCreateWorkloadQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::CREATE_WORKLOAD);
|
||||
|
||||
if (create_workload_query.or_replace)
|
||||
access_rights_elements.emplace_back(AccessType::DROP_WORKLOAD);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!create_workload_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
auto workload_name = create_workload_query.getWorkloadName();
|
||||
bool throw_if_exists = !create_workload_query.if_not_exists && !create_workload_query.or_replace;
|
||||
bool replace_if_exists = create_workload_query.or_replace;
|
||||
|
||||
current_context->getWorkloadEntityStorage().storeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Workload,
|
||||
workload_name,
|
||||
query_ptr,
|
||||
throw_if_exists,
|
||||
replace_if_exists,
|
||||
current_context->getSettingsRef());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterCreateWorkloadQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterCreateWorkloadQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterCreateWorkloadQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
25
src/Interpreters/InterpreterCreateWorkloadQuery.h
Normal file
25
src/Interpreters/InterpreterCreateWorkloadQuery.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterCreateWorkloadQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterCreateWorkloadQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_)
|
||||
: WithMutableContext(context_), query_ptr(query_ptr_)
|
||||
{
|
||||
}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
60
src/Interpreters/InterpreterDropResourceQuery.cpp
Normal file
60
src/Interpreters/InterpreterDropResourceQuery.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterDropResourceQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterDropResourceQuery::execute()
|
||||
{
|
||||
ASTDropResourceQuery & drop_resource_query = query_ptr->as<ASTDropResourceQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::DROP_RESOURCE);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!drop_resource_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
bool throw_if_not_exists = !drop_resource_query.if_exists;
|
||||
|
||||
current_context->getWorkloadEntityStorage().removeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Resource,
|
||||
drop_resource_query.resource_name,
|
||||
throw_if_not_exists);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterDropResourceQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterDropResourceQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterDropResourceQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
21
src/Interpreters/InterpreterDropResourceQuery.h
Normal file
21
src/Interpreters/InterpreterDropResourceQuery.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterDropResourceQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterDropResourceQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) {}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
60
src/Interpreters/InterpreterDropWorkloadQuery.cpp
Normal file
60
src/Interpreters/InterpreterDropWorkloadQuery.cpp
Normal file
@ -0,0 +1,60 @@
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/InterpreterDropWorkloadQuery.h>
|
||||
|
||||
#include <Access/ContextAccess.h>
|
||||
#include <Common/Scheduler/Workload/IWorkloadEntityStorage.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_QUERY;
|
||||
}
|
||||
|
||||
BlockIO InterpreterDropWorkloadQuery::execute()
|
||||
{
|
||||
ASTDropWorkloadQuery & drop_workload_query = query_ptr->as<ASTDropWorkloadQuery &>();
|
||||
|
||||
AccessRightsElements access_rights_elements;
|
||||
access_rights_elements.emplace_back(AccessType::DROP_WORKLOAD);
|
||||
|
||||
auto current_context = getContext();
|
||||
|
||||
if (!drop_workload_query.cluster.empty())
|
||||
{
|
||||
if (current_context->getWorkloadEntityStorage().isReplicated())
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "ON CLUSTER is not allowed because workload entities are replicated automatically");
|
||||
|
||||
DDLQueryOnClusterParams params;
|
||||
params.access_to_check = std::move(access_rights_elements);
|
||||
return executeDDLQueryOnCluster(query_ptr, current_context, params);
|
||||
}
|
||||
|
||||
current_context->checkAccess(access_rights_elements);
|
||||
|
||||
bool throw_if_not_exists = !drop_workload_query.if_exists;
|
||||
|
||||
current_context->getWorkloadEntityStorage().removeEntity(
|
||||
current_context,
|
||||
WorkloadEntityType::Workload,
|
||||
drop_workload_query.workload_name,
|
||||
throw_if_not_exists);
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void registerInterpreterDropWorkloadQuery(InterpreterFactory & factory)
|
||||
{
|
||||
auto create_fn = [] (const InterpreterFactory::Arguments & args)
|
||||
{
|
||||
return std::make_unique<InterpreterDropWorkloadQuery>(args.query, args.context);
|
||||
};
|
||||
factory.registerInterpreter("InterpreterDropWorkloadQuery", create_fn);
|
||||
}
|
||||
|
||||
}
|
21
src/Interpreters/InterpreterDropWorkloadQuery.h
Normal file
21
src/Interpreters/InterpreterDropWorkloadQuery.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
class InterpreterDropWorkloadQuery : public IInterpreter, WithMutableContext
|
||||
{
|
||||
public:
|
||||
InterpreterDropWorkloadQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) {}
|
||||
|
||||
BlockIO execute() override;
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
};
|
||||
|
||||
}
|
@ -3,9 +3,13 @@
|
||||
#include <Parsers/ASTCheckQuery.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTCreateFunctionQuery.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTCreateIndexQuery.h>
|
||||
#include <Parsers/ASTDeleteQuery.h>
|
||||
#include <Parsers/ASTDropFunctionQuery.h>
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
#include <Parsers/ASTDropIndexQuery.h>
|
||||
#include <Parsers/ASTDropQuery.h>
|
||||
#include <Parsers/ASTUndropQuery.h>
|
||||
@ -326,6 +330,22 @@ InterpreterFactory::InterpreterPtr InterpreterFactory::get(ASTPtr & query, Conte
|
||||
{
|
||||
interpreter_name = "InterpreterDropFunctionQuery";
|
||||
}
|
||||
else if (query->as<ASTCreateWorkloadQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterCreateWorkloadQuery";
|
||||
}
|
||||
else if (query->as<ASTDropWorkloadQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterDropWorkloadQuery";
|
||||
}
|
||||
else if (query->as<ASTCreateResourceQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterCreateResourceQuery";
|
||||
}
|
||||
else if (query->as<ASTDropResourceQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterDropResourceQuery";
|
||||
}
|
||||
else if (query->as<ASTCreateIndexQuery>())
|
||||
{
|
||||
interpreter_name = "InterpreterCreateIndexQuery";
|
||||
|
@ -52,6 +52,10 @@ void registerInterpreterExternalDDLQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterTransactionControlQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateFunctionQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropFunctionQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateWorkloadQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropWorkloadQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateResourceQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropResourceQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateIndexQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterCreateNamedCollectionQuery(InterpreterFactory & factory);
|
||||
void registerInterpreterDropIndexQuery(InterpreterFactory & factory);
|
||||
@ -111,6 +115,10 @@ void registerInterpreters()
|
||||
registerInterpreterTransactionControlQuery(factory);
|
||||
registerInterpreterCreateFunctionQuery(factory);
|
||||
registerInterpreterDropFunctionQuery(factory);
|
||||
registerInterpreterCreateWorkloadQuery(factory);
|
||||
registerInterpreterDropWorkloadQuery(factory);
|
||||
registerInterpreterCreateResourceQuery(factory);
|
||||
registerInterpreterDropResourceQuery(factory);
|
||||
registerInterpreterCreateIndexQuery(factory);
|
||||
registerInterpreterCreateNamedCollectionQuery(factory);
|
||||
registerInterpreterDropIndexQuery(factory);
|
||||
|
47
src/Parsers/ASTCreateResourceQuery.cpp
Normal file
47
src/Parsers/ASTCreateResourceQuery.cpp
Normal file
@ -0,0 +1,47 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTCreateResourceQuery::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTCreateResourceQuery>(*this);
|
||||
res->children.clear();
|
||||
|
||||
res->resource_name = resource_name->clone();
|
||||
res->children.push_back(res->resource_name);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTCreateResourceQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE ";
|
||||
|
||||
if (or_replace)
|
||||
settings.ostr << "OR REPLACE ";
|
||||
|
||||
settings.ostr << "RESOURCE ";
|
||||
|
||||
if (if_not_exists)
|
||||
settings.ostr << "IF NOT EXISTS ";
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(getResourceName()) << (settings.hilite ? hilite_none : "");
|
||||
|
||||
formatOnCluster(settings);
|
||||
}
|
||||
|
||||
String ASTCreateResourceQuery::getResourceName() const
|
||||
{
|
||||
String name;
|
||||
tryGetIdentifierNameInto(resource_name, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
32
src/Parsers/ASTCreateResourceQuery.h
Normal file
32
src/Parsers/ASTCreateResourceQuery.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTCreateResourceQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
ASTPtr resource_name;
|
||||
// TODO(serxa): add resource definition
|
||||
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
String getID(char delim) const override { return "CreateResourceQuery" + (delim + getResourceName()); }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTCreateResourceQuery>(clone()); }
|
||||
|
||||
String getResourceName() const;
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Create; }
|
||||
};
|
||||
|
||||
}
|
67
src/Parsers/ASTCreateWorkloadQuery.cpp
Normal file
67
src/Parsers/ASTCreateWorkloadQuery.cpp
Normal file
@ -0,0 +1,67 @@
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Parsers/ASTCreateWorkloadQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTCreateWorkloadQuery::clone() const
|
||||
{
|
||||
auto res = std::make_shared<ASTCreateWorkloadQuery>(*this);
|
||||
res->children.clear();
|
||||
|
||||
res->workload_name = workload_name->clone();
|
||||
res->children.push_back(res->workload_name);
|
||||
|
||||
// TODO(serxa): clone settings
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void ASTCreateWorkloadQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE ";
|
||||
|
||||
if (or_replace)
|
||||
settings.ostr << "OR REPLACE ";
|
||||
|
||||
settings.ostr << "WORKLOAD ";
|
||||
|
||||
if (if_not_exists)
|
||||
settings.ostr << "IF NOT EXISTS ";
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(getWorkloadName()) << (settings.hilite ? hilite_none : "");
|
||||
|
||||
formatOnCluster(settings);
|
||||
|
||||
if (hasParent())
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << " IN " << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(getWorkloadParent()) << (settings.hilite ? hilite_none : "");
|
||||
}
|
||||
}
|
||||
|
||||
String ASTCreateWorkloadQuery::getWorkloadName() const
|
||||
{
|
||||
String name;
|
||||
tryGetIdentifierNameInto(workload_name, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
bool ASTCreateWorkloadQuery::hasParent() const
|
||||
{
|
||||
return workload_parent != nullptr;
|
||||
}
|
||||
|
||||
String ASTCreateWorkloadQuery::getWorkloadParent() const
|
||||
{
|
||||
String name;
|
||||
tryGetIdentifierNameInto(workload_parent, name);
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
35
src/Parsers/ASTCreateWorkloadQuery.h
Normal file
35
src/Parsers/ASTCreateWorkloadQuery.h
Normal file
@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTCreateWorkloadQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
ASTPtr workload_name;
|
||||
ASTPtr workload_parent;
|
||||
// TODO(serxa): add workload settings (weight and priority should also go inside settings, because they can differ for different resources)
|
||||
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
String getID(char delim) const override { return "CreateWorkloadQuery" + (delim + getWorkloadName()); }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTCreateWorkloadQuery>(clone()); }
|
||||
|
||||
String getWorkloadName() const;
|
||||
bool hasParent() const;
|
||||
String getWorkloadParent() const;
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Create; }
|
||||
};
|
||||
|
||||
}
|
25
src/Parsers/ASTDropResourceQuery.cpp
Normal file
25
src/Parsers/ASTDropResourceQuery.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#include <Parsers/ASTDropResourceQuery.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTDropResourceQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTDropResourceQuery>(*this);
|
||||
}
|
||||
|
||||
void ASTDropResourceQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP RESOURCE ";
|
||||
|
||||
if (if_exists)
|
||||
settings.ostr << "IF EXISTS ";
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(resource_name) << (settings.hilite ? hilite_none : "");
|
||||
formatOnCluster(settings);
|
||||
}
|
||||
|
||||
}
|
28
src/Parsers/ASTDropResourceQuery.h
Normal file
28
src/Parsers/ASTDropResourceQuery.h
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTDropResourceQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String resource_name;
|
||||
|
||||
bool if_exists = false;
|
||||
|
||||
String getID(char) const override { return "DropResourceQuery"; }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTDropResourceQuery>(clone()); }
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Drop; }
|
||||
};
|
||||
|
||||
}
|
25
src/Parsers/ASTDropWorkloadQuery.cpp
Normal file
25
src/Parsers/ASTDropWorkloadQuery.cpp
Normal file
@ -0,0 +1,25 @@
|
||||
#include <Parsers/ASTDropWorkloadQuery.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ASTPtr ASTDropWorkloadQuery::clone() const
|
||||
{
|
||||
return std::make_shared<ASTDropWorkloadQuery>(*this);
|
||||
}
|
||||
|
||||
void ASTDropWorkloadQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP WORKLOAD ";
|
||||
|
||||
if (if_exists)
|
||||
settings.ostr << "IF EXISTS ";
|
||||
|
||||
settings.ostr << (settings.hilite ? hilite_none : "");
|
||||
settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(workload_name) << (settings.hilite ? hilite_none : "");
|
||||
formatOnCluster(settings);
|
||||
}
|
||||
|
||||
}
|
28
src/Parsers/ASTDropWorkloadQuery.h
Normal file
28
src/Parsers/ASTDropWorkloadQuery.h
Normal file
@ -0,0 +1,28 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTQueryWithOnCluster.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTDropWorkloadQuery : public IAST, public ASTQueryWithOnCluster
|
||||
{
|
||||
public:
|
||||
String workload_name;
|
||||
|
||||
bool if_exists = false;
|
||||
|
||||
String getID(char) const override { return "DropWorkloadQuery"; }
|
||||
|
||||
ASTPtr clone() const override;
|
||||
|
||||
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override;
|
||||
|
||||
ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams &) const override { return removeOnCluster<ASTDropWorkloadQuery>(clone()); }
|
||||
|
||||
QueryKind getQueryKind() const override { return QueryKind::Drop; }
|
||||
};
|
||||
|
||||
}
|
@ -408,6 +408,7 @@ namespace DB
|
||||
MR_MACROS(REPLACE, "REPLACE") \
|
||||
MR_MACROS(RESET_SETTING, "RESET SETTING") \
|
||||
MR_MACROS(RESET_AUTHENTICATION_METHODS_TO_NEW, "RESET AUTHENTICATION METHODS TO NEW") \
|
||||
MR_MACROS(RESOURCE, "RESOURCE") \
|
||||
MR_MACROS(RESPECT_NULLS, "RESPECT NULLS") \
|
||||
MR_MACROS(RESTORE, "RESTORE") \
|
||||
MR_MACROS(RESTRICT, "RESTRICT") \
|
||||
@ -520,6 +521,7 @@ namespace DB
|
||||
MR_MACROS(WHEN, "WHEN") \
|
||||
MR_MACROS(WHERE, "WHERE") \
|
||||
MR_MACROS(WINDOW, "WINDOW") \
|
||||
MR_MACROS(WORKLOAD, "WORKLOAD") \
|
||||
MR_MACROS(QUALIFY, "QUALIFY") \
|
||||
MR_MACROS(WITH_ADMIN_OPTION, "WITH ADMIN OPTION") \
|
||||
MR_MACROS(WITH_CHECK, "WITH CHECK") \
|
||||
|
@ -1,10 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
#include <base/defines.h>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class AbstractFunction
|
||||
{
|
||||
friend class FunctionSecretArgumentsFinder;
|
||||
public:
|
||||
class Argument
|
||||
{
|
||||
public:
|
||||
virtual ~Argument() = default;
|
||||
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
|
||||
virtual bool isIdentifier() const = 0;
|
||||
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
|
||||
};
|
||||
class Arguments
|
||||
{
|
||||
public:
|
||||
virtual ~Arguments() = default;
|
||||
virtual size_t size() const = 0;
|
||||
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
|
||||
};
|
||||
|
||||
virtual ~AbstractFunction() = default;
|
||||
virtual String name() const = 0;
|
||||
bool hasArguments() const { return !!arguments; }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Arguments> arguments;
|
||||
};
|
||||
|
||||
class FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
@ -23,6 +55,485 @@ public:
|
||||
return count != 0 || !nested_maps.empty();
|
||||
}
|
||||
};
|
||||
|
||||
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
|
||||
|
||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||
|
||||
protected:
|
||||
const std::unique_ptr<AbstractFunction> function;
|
||||
Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (index >= function->arguments->size())
|
||||
return;
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findOrdinaryFunctionSecretArguments()
|
||||
{
|
||||
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
|
||||
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
|
||||
(function->name() == "gcs"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function->name() == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if (function->name() == "azureBlobStorage")
|
||||
{
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function->name() == "azureBlobStorageCluster")
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
|
||||
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
|
||||
(function->name() == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (function->name() == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
size_t count = function->arguments->size();
|
||||
while (count > 0)
|
||||
{
|
||||
const auto f = function->arguments->at(count - 1)->getFunction();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->name() == "headers")
|
||||
result.nested_maps.push_back(f->name());
|
||||
else if (f->name() != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 1);
|
||||
return;
|
||||
}
|
||||
else if (is_cluster_function && isNamedCollectionName(1))
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 2);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
size_t count = function->arguments->size();
|
||||
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
|
||||
if (url_arg_idx + 4 < count)
|
||||
markSecretArgument(url_arg_idx + 4);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= function->arguments->size())
|
||||
return false;
|
||||
|
||||
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
return argument.tryGetString(res, allow_identifier);
|
||||
}
|
||||
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (function->arguments->size() < 3)
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
auto table_function = function->arguments->at(arg_num)->getFunction();
|
||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::optional<String> database;
|
||||
std::optional<QualifiedTableName> qualified_table_name;
|
||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||
{
|
||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
++arg_num;
|
||||
if (database)
|
||||
{
|
||||
/// Skip the 'table' argument if the previous argument was a database name.
|
||||
++arg_num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip username.
|
||||
++arg_num;
|
||||
|
||||
/// Do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
/// Empty string is also allowed (it means the default database).
|
||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||
size_t arg_idx,
|
||||
std::optional<String> & res_database,
|
||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||
{
|
||||
res_database.reset();
|
||||
res_qualified_table_name.reset();
|
||||
|
||||
String str;
|
||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||
return false;
|
||||
|
||||
if (str.empty())
|
||||
{
|
||||
res_database = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||
if (!qualified_table_name)
|
||||
return false;
|
||||
|
||||
if (qualified_table_name->database.empty())
|
||||
res_database = std::move(qualified_table_name->table);
|
||||
else
|
||||
res_qualified_table_name = std::move(qualified_table_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (function->arguments->size() == 0)
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
result.start = 1;
|
||||
result.count = function->arguments->size() - 1;
|
||||
}
|
||||
|
||||
void findTableEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function->name();
|
||||
if (engine_name == "ExternalDistributed")
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
findExternalDistributedTableEngineSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
|
||||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
|
||||
{
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
findS3TableEngineSecretArguments();
|
||||
}
|
||||
else if (engine_name == "URL")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findExternalDistributedTableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(1))
|
||||
{
|
||||
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
markSecretArgument(5);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3TableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((3 <= count) && (count <= 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (count == 3)
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (2 < count)
|
||||
markSecretArgument(2);
|
||||
}
|
||||
|
||||
void findDatabaseEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function->name();
|
||||
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
||||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
||||
findMySQLDatabaseSecretArguments();
|
||||
}
|
||||
else if (engine_name == "S3")
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
findS3DatabaseSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLDatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// MySQL(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
markSecretArgument(3);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3DatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'password', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
void findBackupNameSecretArguments()
|
||||
{
|
||||
const String & engine_name = function->name();
|
||||
if (engine_name == "S3")
|
||||
{
|
||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (function->arguments->size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
return function->arguments->at(arg_idx)->isIdentifier();
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < function->arguments->size(); ++i)
|
||||
{
|
||||
const auto & argument = function->arguments->at(i);
|
||||
const auto equals_func = argument->getFunction();
|
||||
if (!equals_func || (equals_func->name() != "equals"))
|
||||
continue;
|
||||
|
||||
if (!equals_func->arguments || equals_func->arguments->size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,35 +1,97 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
class FunctionSecretArgumentsFinderAST
|
||||
class FunctionAST : public AbstractFunction
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_)
|
||||
class ArgumentAST : public Argument
|
||||
{
|
||||
if (!function.arguments)
|
||||
public:
|
||||
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
|
||||
std::unique_ptr<AbstractFunction> getFunction() const override
|
||||
{
|
||||
if (const auto * f = argument->as<ASTFunction>())
|
||||
return std::make_unique<FunctionAST>(*f);
|
||||
return nullptr;
|
||||
}
|
||||
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
|
||||
bool tryGetString(String * res, bool allow_identifier) const override
|
||||
{
|
||||
if (const auto * literal = argument->as<ASTLiteral>())
|
||||
{
|
||||
if (literal->value.getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<ASTIdentifier>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->name();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
private:
|
||||
const IAST * argument = nullptr;
|
||||
};
|
||||
|
||||
class ArgumentsAST : public Arguments
|
||||
{
|
||||
public:
|
||||
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
|
||||
size_t size() const override { return arguments ? arguments->size() : 0; }
|
||||
std::unique_ptr<Argument> at(size_t n) const override
|
||||
{
|
||||
return std::make_unique<ArgumentAST>(arguments->at(n).get());
|
||||
}
|
||||
private:
|
||||
const ASTs * arguments = nullptr;
|
||||
};
|
||||
|
||||
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
|
||||
{
|
||||
if (!function->arguments)
|
||||
return;
|
||||
|
||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
const auto * expr_list = function->arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return;
|
||||
|
||||
arguments = &expr_list->children;
|
||||
switch (function.kind)
|
||||
arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
|
||||
}
|
||||
|
||||
String name() const override { return function->name; }
|
||||
private:
|
||||
const ASTFunction * function = nullptr;
|
||||
};
|
||||
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
|
||||
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
|
||||
{
|
||||
if (!function->hasArguments())
|
||||
return;
|
||||
|
||||
switch (function_.kind)
|
||||
{
|
||||
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
|
||||
case ASTFunction::Kind::WINDOW_FUNCTION: break;
|
||||
@ -43,507 +105,7 @@ public:
|
||||
}
|
||||
|
||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||
|
||||
private:
|
||||
const ASTFunction & function;
|
||||
const ASTs * arguments = nullptr;
|
||||
FunctionSecretArgumentsFinder::Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (index >= arguments->size())
|
||||
return;
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findOrdinaryFunctionSecretArguments()
|
||||
{
|
||||
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
|
||||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
|
||||
(function.name == "gcs"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function.name == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if (function.name == "azureBlobStorage")
|
||||
{
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function.name == "azureBlobStorageCluster")
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((function.name == "remote") || (function.name == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
|
||||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
|
||||
(function.name == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (function.name == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
size_t count = arguments->size();
|
||||
while (count > 0)
|
||||
{
|
||||
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->name == "headers")
|
||||
result.nested_maps.push_back(f->name);
|
||||
else if (f->name != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 1);
|
||||
return;
|
||||
}
|
||||
else if (is_cluster_function && isNamedCollectionName(1))
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 2);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
size_t count = arguments->size();
|
||||
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
|
||||
if (url_arg_idx + 4 < count)
|
||||
markSecretArgument(url_arg_idx + 4);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= arguments->size())
|
||||
return false;
|
||||
|
||||
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
if (const auto * literal = argument.as<ASTLiteral>())
|
||||
{
|
||||
if (literal->value.getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument.as<ASTIdentifier>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->name();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (arguments->size() < 3)
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
|
||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::optional<String> database;
|
||||
std::optional<QualifiedTableName> qualified_table_name;
|
||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||
{
|
||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
++arg_num;
|
||||
if (database)
|
||||
{
|
||||
/// Skip the 'table' argument if the previous argument was a database name.
|
||||
++arg_num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip username.
|
||||
++arg_num;
|
||||
|
||||
/// Do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
/// Empty string is also allowed (it means the default database).
|
||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||
size_t arg_idx,
|
||||
std::optional<String> & res_database,
|
||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||
{
|
||||
res_database.reset();
|
||||
res_qualified_table_name.reset();
|
||||
|
||||
String str;
|
||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||
return false;
|
||||
|
||||
if (str.empty())
|
||||
{
|
||||
res_database = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||
if (!qualified_table_name)
|
||||
return false;
|
||||
|
||||
if (qualified_table_name->database.empty())
|
||||
res_database = std::move(qualified_table_name->table);
|
||||
else
|
||||
res_qualified_table_name = std::move(qualified_table_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (arguments->empty())
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
result.start = 1;
|
||||
result.count = arguments->size() - 1;
|
||||
}
|
||||
|
||||
void findTableEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if (engine_name == "ExternalDistributed")
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
findExternalDistributedTableEngineSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
|
||||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
|
||||
{
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
findS3TableEngineSecretArguments();
|
||||
}
|
||||
else if (engine_name == "URL")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findExternalDistributedTableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(1))
|
||||
{
|
||||
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
markSecretArgument(5);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3TableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((3 <= count) && (count <= 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (count == 3)
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (2 < count)
|
||||
markSecretArgument(2);
|
||||
}
|
||||
|
||||
void findDatabaseEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
||||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
||||
findMySQLDatabaseSecretArguments();
|
||||
}
|
||||
else if (engine_name == "S3")
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
findS3DatabaseSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLDatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// MySQL(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
markSecretArgument(3);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3DatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'password', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
void findBackupNameSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if (engine_name == "S3")
|
||||
{
|
||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (arguments->size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
|
||||
return identifier != nullptr;
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < arguments->size(); ++i)
|
||||
{
|
||||
const auto & argument = (*arguments)[i];
|
||||
const auto * equals_func = argument->as<ASTFunction>();
|
||||
if (!equals_func || (equals_func->name != "equals"))
|
||||
continue;
|
||||
|
||||
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
continue;
|
||||
|
||||
const auto & equal_args = expr_list->children;
|
||||
if (equal_args.size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
62
src/Parsers/ParserCreateResourceQuery.cpp
Normal file
62
src/Parsers/ParserCreateResourceQuery.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include <Parsers/ParserCreateResourceQuery.h>
|
||||
|
||||
#include <Parsers/ASTCreateResourceQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ExpressionElementParsers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool ParserCreateResourceQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
ParserKeyword s_create(Keyword::CREATE);
|
||||
ParserKeyword s_resource(Keyword::RESOURCE);
|
||||
ParserKeyword s_or_replace(Keyword::OR_REPLACE);
|
||||
ParserKeyword s_if_not_exists(Keyword::IF_NOT_EXISTS);
|
||||
ParserKeyword s_on(Keyword::ON);
|
||||
ParserIdentifier resource_name_p;
|
||||
// TODO(serxa): parse resource definition
|
||||
|
||||
ASTPtr resource_name;
|
||||
|
||||
String cluster_str;
|
||||
bool or_replace = false;
|
||||
bool if_not_exists = false;
|
||||
|
||||
if (!s_create.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (s_or_replace.ignore(pos, expected))
|
||||
or_replace = true;
|
||||
|
||||
if (!s_resource.ignore(pos, expected))
|
||||
return false;
|
||||
|
||||
if (!or_replace && s_if_not_exists.ignore(pos, expected))
|
||||
if_not_exists = true;
|
||||
|
||||
if (!resource_name_p.parse(pos, resource_name, expected))
|
||||
return false;
|
||||
|
||||
if (s_on.ignore(pos, expected))
|
||||
{
|
||||
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
auto create_resource_query = std::make_shared<ASTCreateResourceQuery>();
|
||||
node = create_resource_query;
|
||||
|
||||
create_resource_query->resource_name = resource_name;
|
||||
create_resource_query->children.push_back(resource_name);
|
||||
|
||||
create_resource_query->or_replace = or_replace;
|
||||
create_resource_query->if_not_exists = if_not_exists;
|
||||
create_resource_query->cluster = std::move(cluster_str);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
16
src/Parsers/ParserCreateResourceQuery.h
Normal file
16
src/Parsers/ParserCreateResourceQuery.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
|
||||
#include "IParserBase.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// CREATE RESOURCE cache_io (WRITE DISK s3diskWithCache, READ DISK s3diskWithCache)
|
||||
class ParserCreateResourceQuery : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "CREATE RESOURCE query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user