mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Compare commits
137 Commits
6933b95a99
...
10002e1e16
Author | SHA1 | Date | |
---|---|---|---|
|
10002e1e16 | ||
|
7fd2207626 | ||
|
69f45acfd7 | ||
|
4c78206d0a | ||
|
429e8ada79 | ||
|
06b49d18d9 | ||
|
a17a8febf7 | ||
|
55529ec5a2 | ||
|
82dbb3bb32 | ||
|
1bcdde3e62 | ||
|
2cef99c311 | ||
|
cd7a1a9288 | ||
|
6597a8ed04 | ||
|
3b901f49e5 | ||
|
958c3effae | ||
|
474499d240 | ||
|
839f06035f | ||
|
4f88ccb6a8 | ||
|
a226567bc2 | ||
|
fcda762a27 | ||
|
9c185374e4 | ||
|
13e82d6439 | ||
|
fdee35cccc | ||
|
9eba103c5e | ||
|
e574c49e25 | ||
|
665f362601 | ||
|
574a26c63b | ||
|
3674c97ebb | ||
|
8508b1ba37 | ||
|
190d3f04c9 | ||
|
aba7de5091 | ||
|
ffaf97a390 | ||
|
452fde78c7 | ||
|
51fa9ebf8a | ||
|
e30ebfa23e | ||
|
b21be2bc54 | ||
|
14736d95c5 | ||
|
4b69d8e2ca | ||
|
5ce8604869 | ||
|
813bcd896f | ||
|
3a05282bce | ||
|
fd0c7a1c18 | ||
|
4704fb8a3b | ||
|
f768717be8 | ||
|
983b061b58 | ||
|
3a299f382d | ||
|
f8f72ccb00 | ||
|
1ccd461c97 | ||
|
5aaff37b36 | ||
|
9f932fb453 | ||
|
19e2197582 | ||
|
d223c4547f | ||
|
58993d3f3b | ||
|
8507d209c0 | ||
|
f5b9d5ad34 | ||
|
4af369fbc4 | ||
|
8cdcc431fe | ||
|
187a717872 | ||
|
4412946532 | ||
|
03737ddcab | ||
|
038f56cb5e | ||
|
63577507c9 | ||
|
9eb78773a6 | ||
|
6f63a7b213 | ||
|
56cfa74a14 | ||
|
dbb1d043fe | ||
|
56f3030b17 | ||
|
31ddfc6f5f | ||
|
ddf2e07fd0 | ||
|
5cc12ca9ee | ||
|
e13247b67e | ||
|
2650a20628 | ||
|
9a31fc385d | ||
|
ddc506a677 | ||
|
2812953a8a | ||
|
492461271b | ||
|
3c47f3df4b | ||
|
11c7cdabf8 | ||
|
71553022e0 | ||
|
53e1975833 | ||
|
8299b31d47 | ||
|
11d2963497 | ||
|
f9335a2fd5 | ||
|
8a89d7b2b9 | ||
|
9c1f4f4545 | ||
|
2e82e06330 | ||
|
7b2810bea2 | ||
|
401a3d0931 | ||
|
beffb92411 | ||
|
16f93ea1b3 | ||
|
1e3bc6d359 | ||
|
562c23eac6 | ||
|
8d5babf65f | ||
|
99ede620be | ||
|
f292767778 | ||
|
7d36f3b764 | ||
|
21bd47f09e | ||
|
fc83c1c7a2 | ||
|
0de3b1dacb | ||
|
8d5d7dd83a | ||
|
61ebcdc2ed | ||
|
1df897db27 | ||
|
8cdc10cf65 | ||
|
f5d49f8e10 | ||
|
83854cf293 | ||
|
e874c6e1de | ||
|
03ccf05d14 | ||
|
3d04f3d33a | ||
|
553c309477 | ||
|
ae582120ae | ||
|
aa4688a982 | ||
|
7aaa0289e1 | ||
|
d6df83d561 | ||
|
1011f8ef9c | ||
|
b0a0988c5b | ||
|
03ab625265 | ||
|
dbd4ee44ed | ||
|
d4a3a033b0 | ||
|
165d08f088 | ||
|
e0dbc53b58 | ||
|
f97551e2ad | ||
|
c252b3c8b0 | ||
|
30229a3bfd | ||
|
8a0f41da7a | ||
|
628a4300ba | ||
|
f88b5988c1 | ||
|
4bb2f7b3f6 | ||
|
95edca513c | ||
|
5004e4d2cc | ||
|
e7fc89ba26 | ||
|
49ce2c7619 | ||
|
80d985a690 | ||
|
891f9c5358 | ||
|
cb0335446e | ||
|
64e10b2dda | ||
|
ef40cc3bae | ||
|
f5c07b8938 |
20
README.md
20
README.md
@ -40,17 +40,8 @@ Every month we get together with the community (users, contributors, customers,
|
||||
|
||||
Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `<at>` clickhouse `<dot>` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc.
|
||||
|
||||
The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov:
|
||||
Upcoming meetups
|
||||
|
||||
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
|
||||
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
|
||||
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
|
||||
|
||||
Other upcoming meetups
|
||||
|
||||
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
|
||||
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
|
||||
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
|
||||
* [Bangalore Meetup](https://www.meetup.com/clickhouse-bangalore-user-group/events/303208274/) - September 18
|
||||
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
|
||||
* [Jakarta Meetup](https://www.meetup.com/clickhouse-indonesia-user-group/events/303191359/) - October 1
|
||||
@ -62,13 +53,20 @@ Other upcoming meetups
|
||||
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
|
||||
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/303096434) - November 26
|
||||
|
||||
Recently completed events
|
||||
Recently completed meetups
|
||||
|
||||
* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
|
||||
* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
|
||||
* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
|
||||
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
|
||||
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
|
||||
* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
|
||||
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
|
||||
* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
|
||||
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
|
||||
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
|
||||
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
|
||||
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
|
@ -188,8 +188,9 @@ namespace Crypto
|
||||
pFile = fopen(keyFile.c_str(), "r");
|
||||
if (pFile)
|
||||
{
|
||||
pem_password_cb * pCB = pass.empty() ? (pem_password_cb *)0 : &passCB;
|
||||
void * pPassword = pass.empty() ? (void *)0 : (void *)pass.c_str();
|
||||
pem_password_cb * pCB = &passCB;
|
||||
static constexpr char * no_password = "";
|
||||
void * pPassword = pass.empty() ? (void *)no_password : (void *)pass.c_str();
|
||||
if (readFunc(pFile, &pKey, pCB, pPassword))
|
||||
{
|
||||
fclose(pFile);
|
||||
@ -225,6 +226,13 @@ namespace Crypto
|
||||
error:
|
||||
if (pFile)
|
||||
fclose(pFile);
|
||||
if (*ppKey)
|
||||
{
|
||||
if constexpr (std::is_same_v<K, EVP_PKEY>)
|
||||
EVP_PKEY_free(*ppKey);
|
||||
else
|
||||
EC_KEY_free(*ppKey);
|
||||
}
|
||||
throw OpenSSLException("EVPKey::loadKey(string)");
|
||||
}
|
||||
|
||||
@ -286,6 +294,13 @@ namespace Crypto
|
||||
error:
|
||||
if (pBIO)
|
||||
BIO_free(pBIO);
|
||||
if (*ppKey)
|
||||
{
|
||||
if constexpr (std::is_same_v<K, EVP_PKEY>)
|
||||
EVP_PKEY_free(*ppKey);
|
||||
else
|
||||
EC_KEY_free(*ppKey);
|
||||
}
|
||||
throw OpenSSLException("EVPKey::loadKey(stream)");
|
||||
}
|
||||
|
||||
|
@ -248,6 +248,9 @@ namespace Net
|
||||
SSL_CTX * sslContext() const;
|
||||
/// Returns the underlying OpenSSL SSL Context object.
|
||||
|
||||
SSL_CTX * takeSslContext();
|
||||
/// Takes ownership of the underlying OpenSSL SSL Context object.
|
||||
|
||||
Usage usage() const;
|
||||
/// Returns whether the context is for use by a client or by a server
|
||||
/// and whether TLSv1 is required.
|
||||
@ -401,6 +404,13 @@ namespace Net
|
||||
return _pSSLContext;
|
||||
}
|
||||
|
||||
inline SSL_CTX * Context::takeSslContext()
|
||||
{
|
||||
auto * result = _pSSLContext;
|
||||
_pSSLContext = nullptr;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
inline bool Context::extendedCertificateVerificationEnabled() const
|
||||
{
|
||||
|
@ -106,6 +106,11 @@ Context::Context(
|
||||
|
||||
Context::~Context()
|
||||
{
|
||||
if (_pSSLContext == nullptr)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
SSL_CTX_free(_pSSLContext);
|
||||
|
2
contrib/postgres
vendored
2
contrib/postgres
vendored
@ -1 +1 @@
|
||||
Subproject commit 665ff8c164d56d012e359735efe4d400c0564b44
|
||||
Subproject commit cfd77000af28469fcb650485bad65a35e7649e41
|
@ -1,18 +1,6 @@
|
||||
/* src/include/pg_config.h. Generated from pg_config.h.in by configure. */
|
||||
/* src/include/pg_config.h.in. Generated from configure.in by autoheader. */
|
||||
|
||||
/* Define to the type of arg 1 of 'accept' */
|
||||
#define ACCEPT_TYPE_ARG1 int
|
||||
|
||||
/* Define to the type of arg 2 of 'accept' */
|
||||
#define ACCEPT_TYPE_ARG2 struct sockaddr *
|
||||
|
||||
/* Define to the type of arg 3 of 'accept' */
|
||||
#define ACCEPT_TYPE_ARG3 size_t
|
||||
|
||||
/* Define to the return type of 'accept' */
|
||||
#define ACCEPT_TYPE_RETURN int
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||
|
||||
@ -49,6 +37,9 @@
|
||||
/* Define to the default TCP port number as a string constant. */
|
||||
#define DEF_PGPORT_STR "5432"
|
||||
|
||||
/* Define to the file name extension of dynamically-loadable modules. */
|
||||
#define DLSUFFIX ".so"
|
||||
|
||||
/* Define to build with GSSAPI support. (--with-gssapi) */
|
||||
//#define ENABLE_GSS 0
|
||||
|
||||
@ -122,6 +113,9 @@
|
||||
don't. */
|
||||
#define HAVE_DECL_SNPRINTF 1
|
||||
|
||||
/* Define to 1 if you have the declaration of `sigwait', and to 0 if you don't. */
|
||||
#define HAVE_DECL_SIGWAIT 1
|
||||
|
||||
/* Define to 1 if you have the declaration of `strlcat', and to 0 if you
|
||||
don't. */
|
||||
#if OS_DARWIN
|
||||
@ -257,6 +251,9 @@
|
||||
/* Define to 1 if you have the `inet_aton' function. */
|
||||
#define HAVE_INET_ATON 1
|
||||
|
||||
/* Define to 1 if you have the `inet_pton' function. */
|
||||
#define HAVE_INET_PTON 1
|
||||
|
||||
/* Define to 1 if the system has the type `int64'. */
|
||||
/* #undef HAVE_INT64 */
|
||||
|
||||
@ -323,6 +320,9 @@
|
||||
/* Define to 1 if you have the `z' library (-lz). */
|
||||
#define HAVE_LIBZ 1
|
||||
|
||||
/* Define to 1 if you have the `zstd' library (-lzstd). */
|
||||
/* #undef HAVE_LIBZSTD */
|
||||
|
||||
/* Define to 1 if constants of type 'long long int' should have the suffix LL.
|
||||
*/
|
||||
#define HAVE_LL_CONSTANTS 1
|
||||
@ -378,6 +378,9 @@
|
||||
/* Define to 1 if you have the <poll.h> header file. */
|
||||
#define HAVE_POLL_H 1
|
||||
|
||||
/* Define to 1 if you have a POSIX-conforming sigwait declaration. */
|
||||
/* #undef HAVE_POSIX_DECL_SIGWAIT */
|
||||
|
||||
/* Define to 1 if you have the `posix_fadvise' function. */
|
||||
#define HAVE_POSIX_FADVISE 1
|
||||
|
||||
@ -408,9 +411,6 @@
|
||||
/* Define to 1 if you have the <pwd.h> header file. */
|
||||
#define HAVE_PWD_H 1
|
||||
|
||||
/* Define to 1 if you have the `random' function. */
|
||||
#define HAVE_RANDOM 1
|
||||
|
||||
/* Define to 1 if you have the <readline.h> header file. */
|
||||
/* #undef HAVE_READLINE_H */
|
||||
|
||||
@ -426,10 +426,6 @@
|
||||
/* Define to 1 if you have the `rint' function. */
|
||||
#define HAVE_RINT 1
|
||||
|
||||
/* Define to 1 if you have the global variable
|
||||
'rl_completion_append_character'. */
|
||||
/* #undef HAVE_RL_COMPLETION_APPEND_CHARACTER */
|
||||
|
||||
/* Define to 1 if you have the `rl_completion_matches' function. */
|
||||
#define HAVE_RL_COMPLETION_MATCHES 1
|
||||
|
||||
@ -439,6 +435,9 @@
|
||||
/* Define to 1 if you have the `rl_reset_screen_size' function. */
|
||||
/* #undef HAVE_RL_RESET_SCREEN_SIZE */
|
||||
|
||||
/* Define to 1 if you have the `rl_variable_bind' function. */
|
||||
#define HAVE_RL_VARIABLE_BIND 1
|
||||
|
||||
/* Define to 1 if you have the <security/pam_appl.h> header file. */
|
||||
#define HAVE_SECURITY_PAM_APPL_H 1
|
||||
|
||||
@ -451,6 +450,9 @@
|
||||
/* Define to 1 if you have the `shm_open' function. */
|
||||
#define HAVE_SHM_OPEN 1
|
||||
|
||||
/* Define to 1 if the system has the type `socklen_t'. */
|
||||
#define HAVE_SOCKLEN_T 1
|
||||
|
||||
/* Define to 1 if you have the `sigprocmask' function. */
|
||||
#define HAVE_SIGPROCMASK 1
|
||||
|
||||
@ -466,9 +468,6 @@
|
||||
/* Define to 1 if you have spinlocks. */
|
||||
#define HAVE_SPINLOCKS 1
|
||||
|
||||
/* Define to 1 if you have the `srandom' function. */
|
||||
#define HAVE_SRANDOM 1
|
||||
|
||||
/* Define to 1 if you have the `SSL_CTX_set_num_tickets' function. */
|
||||
/* #define HAVE_SSL_CTX_SET_NUM_TICKETS */
|
||||
|
||||
@ -885,6 +884,9 @@
|
||||
/* Define to select Win32-style shared memory. */
|
||||
/* #undef USE_WIN32_SHARED_MEMORY */
|
||||
|
||||
/* Define to 1 to build with ZSTD support. (--with-zstd) */
|
||||
/* #undef USE_ZSTD */
|
||||
|
||||
/* Define to 1 if `wcstombs_l' requires <xlocale.h>. */
|
||||
/* #undef WCSTOMBS_L_IN_XLOCALE */
|
||||
|
||||
|
@ -9,7 +9,7 @@ Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
quantileDDsketch[relative_accuracy, (level)](expr)
|
||||
quantileDD(relative_accuracy, [level])(expr)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -29,6 +29,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_RESTORE_TABLE;
|
||||
extern const int ACCESS_ENTITY_ALREADY_EXISTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
@ -175,9 +176,46 @@ namespace
|
||||
return res;
|
||||
}
|
||||
|
||||
std::unordered_map<UUID, UUID> resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies, const AccessControl & access_control, bool allow_unresolved_dependencies)
|
||||
/// Checks if new entities (which we're going to restore) already exist,
|
||||
/// and either skips them or throws an exception depending on the restore settings.
|
||||
void checkExistingEntities(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
|
||||
std::unordered_map<UUID, UUID> & old_to_new_id,
|
||||
const AccessControl & access_control,
|
||||
RestoreAccessCreationMode creation_mode)
|
||||
{
|
||||
if (creation_mode == RestoreAccessCreationMode::kReplace)
|
||||
return;
|
||||
|
||||
auto should_skip = [&](const std::pair<UUID, AccessEntityPtr> & id_and_entity)
|
||||
{
|
||||
const auto & id = id_and_entity.first;
|
||||
const auto & entity = *id_and_entity.second;
|
||||
auto existing_id = access_control.find(entity.getType(), entity.getName());
|
||||
if (!existing_id)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else if (creation_mode == RestoreAccessCreationMode::kCreateIfNotExists)
|
||||
{
|
||||
old_to_new_id[id] = *existing_id;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS, "Cannot restore {} because it already exists", entity.formatTypeWithName());
|
||||
}
|
||||
};
|
||||
|
||||
std::erase_if(entities, should_skip);
|
||||
}
|
||||
|
||||
/// If new entities (which we're going to restore) depend on other entities which are not going to be restored or not present in the backup
|
||||
/// then we should try to replace those dependencies with already existing entities.
|
||||
void resolveDependencies(const std::unordered_map<UUID, std::pair<String, AccessEntityType>> & dependencies,
|
||||
std::unordered_map<UUID, UUID> & old_to_new_ids,
|
||||
const AccessControl & access_control,
|
||||
bool allow_unresolved_dependencies)
|
||||
{
|
||||
std::unordered_map<UUID, UUID> old_to_new_ids;
|
||||
for (const auto & [id, name_and_type] : dependencies)
|
||||
{
|
||||
std::optional<UUID> new_id;
|
||||
@ -188,9 +226,9 @@ namespace
|
||||
if (new_id)
|
||||
old_to_new_ids.emplace(id, *new_id);
|
||||
}
|
||||
return old_to_new_ids;
|
||||
}
|
||||
|
||||
/// Generates random IDs for the new entities.
|
||||
void generateRandomIDs(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
Poco::UUIDGenerator generator;
|
||||
@ -203,27 +241,12 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
/// Updates dependencies of the new entities using a specified map.
|
||||
void replaceDependencies(std::vector<std::pair<UUID, AccessEntityPtr>> & entities,
|
||||
const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
for (auto & entity : entities | boost::adaptors::map_values)
|
||||
{
|
||||
bool need_replace = false;
|
||||
for (const auto & dependency : entity->findDependencies())
|
||||
{
|
||||
if (old_to_new_ids.contains(dependency))
|
||||
{
|
||||
need_replace = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!need_replace)
|
||||
continue;
|
||||
|
||||
auto new_entity = entity->clone();
|
||||
new_entity->replaceDependencies(old_to_new_ids);
|
||||
entity = new_entity;
|
||||
}
|
||||
IAccessEntity::replaceDependencies(entity, old_to_new_ids);
|
||||
}
|
||||
|
||||
AccessRightsElements getRequiredAccessToRestore(const std::vector<std::pair<UUID, AccessEntityPtr>> & entities)
|
||||
@ -314,7 +337,9 @@ std::pair<String, BackupEntryPtr> makeBackupEntryForAccess(
|
||||
|
||||
AccessRestorerFromBackup::AccessRestorerFromBackup(
|
||||
const BackupPtr & backup_, const RestoreSettings & restore_settings_)
|
||||
: backup(backup_), allow_unresolved_access_dependencies(restore_settings_.allow_unresolved_access_dependencies)
|
||||
: backup(backup_)
|
||||
, creation_mode(restore_settings_.create_access)
|
||||
, allow_unresolved_dependencies(restore_settings_.allow_unresolved_access_dependencies)
|
||||
{
|
||||
}
|
||||
|
||||
@ -362,7 +387,9 @@ std::vector<std::pair<UUID, AccessEntityPtr>> AccessRestorerFromBackup::getAcces
|
||||
{
|
||||
auto new_entities = entities;
|
||||
|
||||
auto old_to_new_ids = resolveDependencies(dependencies, access_control, allow_unresolved_access_dependencies);
|
||||
std::unordered_map<UUID, UUID> old_to_new_ids;
|
||||
checkExistingEntities(new_entities, old_to_new_ids, access_control, creation_mode);
|
||||
resolveDependencies(dependencies, old_to_new_ids, access_control, allow_unresolved_dependencies);
|
||||
generateRandomIDs(new_entities, old_to_new_ids);
|
||||
replaceDependencies(new_entities, old_to_new_ids);
|
||||
|
||||
|
@ -17,6 +17,7 @@ using BackupPtr = std::shared_ptr<const IBackup>;
|
||||
class IBackupEntry;
|
||||
using BackupEntryPtr = std::shared_ptr<const IBackupEntry>;
|
||||
struct RestoreSettings;
|
||||
enum class RestoreAccessCreationMode : uint8_t;
|
||||
|
||||
|
||||
/// Makes a backup of access entities of a specified type.
|
||||
@ -45,7 +46,8 @@ public:
|
||||
|
||||
private:
|
||||
BackupPtr backup;
|
||||
bool allow_unresolved_access_dependencies = false;
|
||||
RestoreAccessCreationMode creation_mode;
|
||||
bool allow_unresolved_dependencies = false;
|
||||
std::vector<std::pair<UUID, AccessEntityPtr>> entities;
|
||||
std::unordered_map<UUID, std::pair<String, AccessEntityType>> dependencies;
|
||||
std::unordered_set<String> data_paths;
|
||||
|
@ -544,9 +544,9 @@ scope_guard AccessControl::subscribeForChanges(const std::vector<UUID> & ids, co
|
||||
return changes_notifier->subscribeForChanges(ids, handler);
|
||||
}
|
||||
|
||||
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool AccessControl::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists))
|
||||
if (MultipleAccessStorage::insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
|
||||
{
|
||||
changes_notifier->sendNotifications();
|
||||
return true;
|
||||
|
@ -243,7 +243,7 @@ private:
|
||||
class CustomSettingsPrefixes;
|
||||
class PasswordComplexityRules;
|
||||
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
|
||||
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
|
||||
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
|
||||
|
||||
|
@ -1,8 +1,6 @@
|
||||
#include <Access/DiskAccessStorage.h>
|
||||
#include <Access/AccessEntityIO.h>
|
||||
#include <Access/AccessChangesNotifier.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
@ -418,7 +416,7 @@ void DiskAccessStorage::setAllInMemory(const std::vector<std::pair<UUID, AccessE
|
||||
|
||||
/// Insert or update entities.
|
||||
for (const auto & [id, entity] : entities_without_conflicts)
|
||||
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* write_on_disk= */ false);
|
||||
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr, /* write_on_disk= */ false);
|
||||
}
|
||||
|
||||
void DiskAccessStorage::removeAllExceptInMemory(const boost::container::flat_set<UUID> & ids_to_keep)
|
||||
@ -507,14 +505,14 @@ std::optional<std::pair<String, AccessEntityType>> DiskAccessStorage::readNameWi
|
||||
}
|
||||
|
||||
|
||||
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool DiskAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, /* write_on_disk = */ true);
|
||||
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id, /* write_on_disk = */ true);
|
||||
}
|
||||
|
||||
|
||||
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk)
|
||||
bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk)
|
||||
{
|
||||
const String & name = new_entity->getName();
|
||||
AccessEntityType type = new_entity->getType();
|
||||
@ -533,9 +531,15 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
|
||||
if (name_collision && !replace_if_exists)
|
||||
{
|
||||
if (throw_if_exists)
|
||||
{
|
||||
throwNameCollisionCannotInsert(type, name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (conflicting_id)
|
||||
*conflicting_id = id_by_name;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto it_by_id = entries_by_id.find(id);
|
||||
@ -548,7 +552,11 @@ bool DiskAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & ne
|
||||
throwIDCollisionCannotInsert(id, type, name, existing_entry.type, existing_entry.name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (conflicting_id)
|
||||
*conflicting_id = id;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (write_on_disk)
|
||||
@ -727,25 +735,4 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Couldn't delete {}", file_path);
|
||||
}
|
||||
|
||||
|
||||
void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
|
||||
{
|
||||
if (!isRestoreAllowed())
|
||||
throwRestoreNotAllowed();
|
||||
|
||||
auto entities = restorer.getAccessEntitiesToRestore();
|
||||
if (entities.empty())
|
||||
return;
|
||||
|
||||
auto create_access = restorer.getRestoreSettings().create_access;
|
||||
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
|
||||
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
|
||||
|
||||
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
|
||||
{
|
||||
for (const auto & [id, entity] : my_entities)
|
||||
insert(id, entity, replace_if_exists, throw_if_exists);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -34,14 +34,13 @@ public:
|
||||
bool exists(const UUID & id) const override;
|
||||
|
||||
bool isBackupAllowed() const override { return backup_allowed; }
|
||||
void restoreFromBackup(RestorerFromBackup & restorer) override;
|
||||
|
||||
private:
|
||||
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
|
||||
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
|
||||
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
|
||||
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
|
||||
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
|
||||
|
||||
@ -55,7 +54,7 @@ private:
|
||||
void listsWritingThreadFunc() TSA_NO_THREAD_SAFETY_ANALYSIS;
|
||||
void stopListsWritingThread();
|
||||
|
||||
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, bool write_on_disk) TSA_REQUIRES(mutex);
|
||||
bool insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id, bool write_on_disk) TSA_REQUIRES(mutex);
|
||||
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
|
||||
bool removeNoLock(const UUID & id, bool throw_if_not_exists, bool write_on_disk) TSA_REQUIRES(mutex);
|
||||
|
||||
|
@ -9,4 +9,28 @@ bool IAccessEntity::equal(const IAccessEntity & other) const
|
||||
return (name == other.name) && (getType() == other.getType());
|
||||
}
|
||||
|
||||
void IAccessEntity::replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
if (old_to_new_ids.empty())
|
||||
return;
|
||||
|
||||
bool need_replace_dependencies = false;
|
||||
auto dependencies = entity->findDependencies();
|
||||
for (const auto & dependency : dependencies)
|
||||
{
|
||||
if (old_to_new_ids.contains(dependency))
|
||||
{
|
||||
need_replace_dependencies = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!need_replace_dependencies)
|
||||
return;
|
||||
|
||||
auto new_entity = entity->clone();
|
||||
new_entity->replaceDependencies(old_to_new_ids);
|
||||
entity = new_entity;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -50,7 +50,8 @@ struct IAccessEntity
|
||||
virtual std::vector<UUID> findDependencies() const { return {}; }
|
||||
|
||||
/// Replaces dependencies according to a specified map.
|
||||
virtual void replaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
|
||||
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) { doReplaceDependencies(old_to_new_ids); }
|
||||
static void replaceDependencies(std::shared_ptr<const IAccessEntity> & entity, const std::unordered_map<UUID, UUID> & old_to_new_ids);
|
||||
|
||||
/// Whether this access entity should be written to a backup.
|
||||
virtual bool isBackupAllowed() const { return false; }
|
||||
@ -66,6 +67,8 @@ protected:
|
||||
{
|
||||
return std::make_shared<EntityClassT>(typeid_cast<const EntityClassT &>(*this));
|
||||
}
|
||||
|
||||
virtual void doReplaceDependencies(const std::unordered_map<UUID, UUID> & /* old_to_new_ids */) {}
|
||||
};
|
||||
|
||||
using AccessEntityPtr = std::shared_ptr<const IAccessEntity>;
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include <Access/User.h>
|
||||
#include <Access/AccessBackup.h>
|
||||
#include <Backups/BackupEntriesCollector.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/callOnce.h>
|
||||
@ -14,10 +16,11 @@
|
||||
#include <base/FnTraits.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
#include <boost/range/adaptor/reversed.hpp>
|
||||
#include <boost/range/algorithm/copy.hpp>
|
||||
#include <boost/range/algorithm_ext/erase.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -178,20 +181,20 @@ UUID IAccessStorage::insert(const AccessEntityPtr & entity)
|
||||
return *insert(entity, /* replace_if_exists = */ false, /* throw_if_exists = */ true);
|
||||
}
|
||||
|
||||
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
|
||||
std::optional<UUID> IAccessStorage::insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
auto id = generateRandomID();
|
||||
|
||||
if (insert(id, entity, replace_if_exists, throw_if_exists))
|
||||
if (insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
|
||||
return id;
|
||||
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
|
||||
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool IAccessStorage::insert(const DB::UUID & id, const DB::AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
return insertImpl(id, entity, replace_if_exists, throw_if_exists);
|
||||
return insertImpl(id, entity, replace_if_exists, throw_if_exists, conflicting_id);
|
||||
}
|
||||
|
||||
|
||||
@ -285,7 +288,7 @@ std::vector<UUID> IAccessStorage::insertOrReplace(const std::vector<AccessEntity
|
||||
}
|
||||
|
||||
|
||||
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool)
|
||||
bool IAccessStorage::insertImpl(const UUID &, const AccessEntityPtr & entity, bool, bool, UUID *)
|
||||
{
|
||||
if (isReadOnly())
|
||||
throwReadonlyCannotInsert(entity->getType(), entity->getName());
|
||||
@ -611,12 +614,51 @@ void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, c
|
||||
}
|
||||
|
||||
|
||||
void IAccessStorage::restoreFromBackup(RestorerFromBackup &)
|
||||
void IAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
|
||||
{
|
||||
if (!isRestoreAllowed())
|
||||
throwRestoreNotAllowed();
|
||||
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "restoreFromBackup() is not implemented in {}", getStorageType());
|
||||
if (isReplicated() && !acquireReplicatedRestore(restorer))
|
||||
return;
|
||||
|
||||
auto entities = restorer.getAccessEntitiesToRestore();
|
||||
if (entities.empty())
|
||||
return;
|
||||
|
||||
auto create_access = restorer.getRestoreSettings().create_access;
|
||||
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
|
||||
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
|
||||
|
||||
restorer.addDataRestoreTask([this, entities_to_restore = std::move(entities), replace_if_exists, throw_if_exists] mutable
|
||||
{
|
||||
std::unordered_map<UUID, UUID> new_to_existing_ids;
|
||||
for (auto & [id, entity] : entities_to_restore)
|
||||
{
|
||||
UUID existing_entity_id;
|
||||
if (!insert(id, entity, replace_if_exists, throw_if_exists, &existing_entity_id))
|
||||
{
|
||||
/// Couldn't insert `entity` because there is an existing entity with the same name.
|
||||
new_to_existing_ids[id] = existing_entity_id;
|
||||
}
|
||||
}
|
||||
|
||||
if (!new_to_existing_ids.empty())
|
||||
{
|
||||
/// If new entities restored from backup have dependencies on other entities from backup which were not restored because they existed,
|
||||
/// then we should correct those dependencies.
|
||||
auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr
|
||||
{
|
||||
auto res = entity;
|
||||
IAccessEntity::replaceDependencies(res, new_to_existing_ids);
|
||||
return res;
|
||||
};
|
||||
std::vector<UUID> ids;
|
||||
ids.reserve(entities_to_restore.size());
|
||||
boost::copy(entities_to_restore | boost::adaptors::map_keys, std::back_inserter(ids));
|
||||
tryUpdate(ids, update_func);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
@ -64,6 +64,9 @@ public:
|
||||
/// Returns true if this entity is readonly.
|
||||
virtual bool isReadOnly(const UUID &) const { return isReadOnly(); }
|
||||
|
||||
/// Returns true if this storage is replicated.
|
||||
virtual bool isReplicated() const { return false; }
|
||||
|
||||
/// Starts periodic reloading and updating of entities in this storage.
|
||||
virtual void startPeriodicReloading() {}
|
||||
|
||||
@ -153,8 +156,8 @@ public:
|
||||
/// Inserts an entity to the storage. Returns ID of a new entry in the storage.
|
||||
/// Throws an exception if the specified name already exists.
|
||||
UUID insert(const AccessEntityPtr & entity);
|
||||
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
|
||||
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
|
||||
std::optional<UUID> insert(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
|
||||
bool insert(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id = nullptr);
|
||||
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, bool replace_if_exists = false, bool throw_if_exists = true);
|
||||
std::vector<UUID> insert(const std::vector<AccessEntityPtr> & multiple_entities, const std::vector<UUID> & ids, bool replace_if_exists = false, bool throw_if_exists = true);
|
||||
|
||||
@ -218,7 +221,7 @@ protected:
|
||||
virtual std::vector<UUID> findAllImpl(AccessEntityType type) const = 0;
|
||||
virtual AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const = 0;
|
||||
virtual std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const;
|
||||
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
|
||||
virtual bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
|
||||
virtual bool removeImpl(const UUID & id, bool throw_if_not_exists);
|
||||
virtual bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
|
||||
virtual std::optional<AuthResult> authenticateImpl(
|
||||
@ -240,6 +243,7 @@ protected:
|
||||
LoggerPtr getLogger() const;
|
||||
static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); }
|
||||
static void clearConflictsInEntitiesList(std::vector<std::pair<UUID, AccessEntityPtr>> & entities, LoggerPtr log_);
|
||||
virtual bool acquireReplicatedRestore(RestorerFromBackup &) const { return false; }
|
||||
[[noreturn]] void throwNotFound(const UUID & id) const;
|
||||
[[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const;
|
||||
[[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type);
|
||||
|
@ -1,7 +1,5 @@
|
||||
#include <Access/MemoryAccessStorage.h>
|
||||
#include <Access/AccessChangesNotifier.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <boost/range/adaptor/map.hpp>
|
||||
@ -63,14 +61,14 @@ AccessEntityPtr MemoryAccessStorage::readImpl(const UUID & id, bool throw_if_not
|
||||
}
|
||||
|
||||
|
||||
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool MemoryAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists);
|
||||
return insertNoLock(id, new_entity, replace_if_exists, throw_if_exists, conflicting_id);
|
||||
}
|
||||
|
||||
|
||||
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
const String & name = new_entity->getName();
|
||||
AccessEntityType type = new_entity->getType();
|
||||
@ -86,9 +84,15 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
|
||||
if (name_collision && !replace_if_exists)
|
||||
{
|
||||
if (throw_if_exists)
|
||||
{
|
||||
throwNameCollisionCannotInsert(type, name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (conflicting_id)
|
||||
*conflicting_id = id_by_name;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto it_by_id = entries_by_id.find(id);
|
||||
@ -97,9 +101,15 @@ bool MemoryAccessStorage::insertNoLock(const UUID & id, const AccessEntityPtr &
|
||||
{
|
||||
const auto & existing_entry = it_by_id->second;
|
||||
if (throw_if_exists)
|
||||
{
|
||||
throwIDCollisionCannotInsert(id, type, name, existing_entry.entity->getType(), existing_entry.entity->getName());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (conflicting_id)
|
||||
*conflicting_id = id;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove collisions if necessary.
|
||||
@ -270,28 +280,7 @@ void MemoryAccessStorage::setAll(const std::vector<std::pair<UUID, AccessEntityP
|
||||
|
||||
/// Insert or update entities.
|
||||
for (const auto & [id, entity] : entities_without_conflicts)
|
||||
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false);
|
||||
}
|
||||
|
||||
|
||||
void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
|
||||
{
|
||||
if (!isRestoreAllowed())
|
||||
throwRestoreNotAllowed();
|
||||
|
||||
auto entities = restorer.getAccessEntitiesToRestore();
|
||||
if (entities.empty())
|
||||
return;
|
||||
|
||||
auto create_access = restorer.getRestoreSettings().create_access;
|
||||
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
|
||||
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
|
||||
|
||||
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
|
||||
{
|
||||
for (const auto & [id, entity] : my_entities)
|
||||
insert(id, entity, replace_if_exists, throw_if_exists);
|
||||
});
|
||||
insertNoLock(id, entity, /* replace_if_exists = */ true, /* throw_if_exists = */ false, /* conflicting_id = */ nullptr);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -34,17 +34,16 @@ public:
|
||||
bool exists(const UUID & id) const override;
|
||||
|
||||
bool isBackupAllowed() const override { return backup_allowed; }
|
||||
void restoreFromBackup(RestorerFromBackup & restorer) override;
|
||||
|
||||
private:
|
||||
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
|
||||
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
|
||||
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
|
||||
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
|
||||
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
|
||||
|
||||
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
|
||||
bool insertNoLock(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
|
||||
bool removeNoLock(const UUID & id, bool throw_if_not_exists);
|
||||
bool updateNoLock(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
|
||||
|
||||
|
@ -353,7 +353,7 @@ void MultipleAccessStorage::reload(ReloadMode reload_mode)
|
||||
}
|
||||
|
||||
|
||||
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
std::shared_ptr<IAccessStorage> storage_for_insertion;
|
||||
|
||||
@ -376,7 +376,7 @@ bool MultipleAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr &
|
||||
getStorageName());
|
||||
}
|
||||
|
||||
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists))
|
||||
if (storage_for_insertion->insert(id, entity, replace_if_exists, throw_if_exists, conflicting_id))
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
ids_cache.set(id, storage_for_insertion);
|
||||
|
@ -67,7 +67,7 @@ protected:
|
||||
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
|
||||
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
std::optional<std::pair<String, AccessEntityType>> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
|
||||
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
|
||||
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
|
||||
std::optional<AuthResult> authenticateImpl(const Credentials & credentials, const Poco::Net::IPAddress & address, const ExternalAuthenticators & external_authenticators, bool throw_if_user_not_exists, bool allow_no_password, bool allow_plaintext_password) const override;
|
||||
|
@ -24,7 +24,7 @@ std::vector<UUID> Quota::findDependencies() const
|
||||
return to_roles.findDependencies();
|
||||
}
|
||||
|
||||
void Quota::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
void Quota::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
to_roles.replaceDependencies(old_to_new_ids);
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ struct Quota : public IAccessEntity
|
||||
AccessEntityType getType() const override { return TYPE; }
|
||||
|
||||
std::vector<UUID> findDependencies() const override;
|
||||
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
bool isBackupAllowed() const override { return true; }
|
||||
};
|
||||
|
||||
|
@ -5,10 +5,9 @@
|
||||
#include <Access/AccessChangesNotifier.h>
|
||||
#include <Access/AccessBackup.h>
|
||||
#include <Backups/BackupEntriesCollector.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <Backups/RestoreSettings.h>
|
||||
#include <Backups/IBackupCoordination.h>
|
||||
#include <Backups/IRestoreCoordination.h>
|
||||
#include <Backups/RestorerFromBackup.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
@ -120,7 +119,7 @@ static void retryOnZooKeeperUserError(size_t attempts, Func && function)
|
||||
}
|
||||
}
|
||||
|
||||
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists)
|
||||
bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id)
|
||||
{
|
||||
const AccessEntityTypeInfo type_info = AccessEntityTypeInfo::get(new_entity->getType());
|
||||
const String & name = new_entity->getName();
|
||||
@ -128,7 +127,7 @@ bool ReplicatedAccessStorage::insertImpl(const UUID & id, const AccessEntityPtr
|
||||
|
||||
auto zookeeper = getZooKeeper();
|
||||
bool ok = false;
|
||||
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists); });
|
||||
retryOnZooKeeperUserError(10, [&]{ ok = insertZooKeeper(zookeeper, id, new_entity, replace_if_exists, throw_if_exists, conflicting_id); });
|
||||
|
||||
if (!ok)
|
||||
return false;
|
||||
@ -143,7 +142,8 @@ bool ReplicatedAccessStorage::insertZooKeeper(
|
||||
const UUID & id,
|
||||
const AccessEntityPtr & new_entity,
|
||||
bool replace_if_exists,
|
||||
bool throw_if_exists)
|
||||
bool throw_if_exists,
|
||||
UUID * conflicting_id)
|
||||
{
|
||||
const String & name = new_entity->getName();
|
||||
const AccessEntityType type = new_entity->getType();
|
||||
@ -167,27 +167,52 @@ bool ReplicatedAccessStorage::insertZooKeeper(
|
||||
|
||||
if (res == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
if (!throw_if_exists && !replace_if_exists)
|
||||
return false; /// Couldn't insert a new entity.
|
||||
|
||||
if (throw_if_exists)
|
||||
if (!replace_if_exists)
|
||||
{
|
||||
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
/// To fail with a nice error message, we need info about what already exists.
|
||||
/// This itself could fail if the conflicting uuid disappears in the meantime.
|
||||
/// If that happens, then we'll just retry from the start.
|
||||
String existing_entity_definition = zookeeper->get(entity_path);
|
||||
/// Couldn't insert the new entity because there is an existing entity with such UUID.
|
||||
if (throw_if_exists)
|
||||
{
|
||||
/// To fail with a nice error message, we need info about what already exists.
|
||||
/// This itself can fail if the conflicting uuid disappears in the meantime.
|
||||
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
|
||||
String existing_entity_definition = zookeeper->get(entity_path);
|
||||
|
||||
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
|
||||
AccessEntityType existing_type = existing_entity->getType();
|
||||
String existing_name = existing_entity->getName();
|
||||
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
|
||||
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
|
||||
AccessEntityType existing_type = existing_entity->getType();
|
||||
String existing_name = existing_entity->getName();
|
||||
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (conflicting_id)
|
||||
*conflicting_id = id;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (responses[1]->error == Coordination::Error::ZNODEEXISTS)
|
||||
{
|
||||
/// Couldn't insert the new entity because there is an existing entity with the same name.
|
||||
if (throw_if_exists)
|
||||
{
|
||||
throwNameCollisionCannotInsert(type, name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (conflicting_id)
|
||||
{
|
||||
/// Get UUID of the existing entry with the same name.
|
||||
/// This itself can fail if the conflicting name disappears in the meantime.
|
||||
/// If that happens, then retryOnZooKeeperUserError() will just retry the operation from the start.
|
||||
*conflicting_id = parseUUID(zookeeper->get(name_path));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Couldn't insert the new entity because there is an existing entity with such name.
|
||||
throwNameCollisionCannotInsert(type, name);
|
||||
zkutil::KeeperMultiException::check(res, ops, responses);
|
||||
}
|
||||
}
|
||||
|
||||
@ -693,28 +718,10 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
|
||||
}
|
||||
|
||||
|
||||
void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer)
|
||||
bool ReplicatedAccessStorage::acquireReplicatedRestore(RestorerFromBackup & restorer) const
|
||||
{
|
||||
if (!isRestoreAllowed())
|
||||
throwRestoreNotAllowed();
|
||||
|
||||
auto restore_coordination = restorer.getRestoreCoordination();
|
||||
if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path))
|
||||
return;
|
||||
|
||||
auto entities = restorer.getAccessEntitiesToRestore();
|
||||
if (entities.empty())
|
||||
return;
|
||||
|
||||
auto create_access = restorer.getRestoreSettings().create_access;
|
||||
bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace);
|
||||
bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate);
|
||||
|
||||
restorer.addDataRestoreTask([this, my_entities = std::move(entities), replace_if_exists, throw_if_exists]
|
||||
{
|
||||
for (const auto & [id, entity] : my_entities)
|
||||
insert(id, entity, replace_if_exists, throw_if_exists);
|
||||
});
|
||||
return restore_coordination->acquireReplicatedAccessStorage(zookeeper_path);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ public:
|
||||
void shutdown() override;
|
||||
|
||||
const char * getStorageType() const override { return STORAGE_TYPE; }
|
||||
bool isReplicated() const override { return true; }
|
||||
|
||||
void startPeriodicReloading() override { startWatchingThread(); }
|
||||
void stopPeriodicReloading() override { stopWatchingThread(); }
|
||||
@ -35,7 +36,6 @@ public:
|
||||
|
||||
bool isBackupAllowed() const override { return backup_allowed; }
|
||||
void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override;
|
||||
void restoreFromBackup(RestorerFromBackup & restorer) override;
|
||||
|
||||
private:
|
||||
String zookeeper_path;
|
||||
@ -48,11 +48,11 @@ private:
|
||||
std::unique_ptr<ThreadFromGlobalPool> watching_thread;
|
||||
std::shared_ptr<ConcurrentBoundedQueue<UUID>> watched_queue;
|
||||
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists) override;
|
||||
bool insertImpl(const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id) override;
|
||||
bool removeImpl(const UUID & id, bool throw_if_not_exists) override;
|
||||
bool updateImpl(const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists) override;
|
||||
|
||||
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists);
|
||||
bool insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists, UUID * conflicting_id);
|
||||
bool removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, bool throw_if_not_exists);
|
||||
bool updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func, bool throw_if_not_exists);
|
||||
|
||||
@ -80,6 +80,7 @@ private:
|
||||
std::optional<UUID> findImpl(AccessEntityType type, const String & name) const override;
|
||||
std::vector<UUID> findAllImpl(AccessEntityType type) const override;
|
||||
AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override;
|
||||
bool acquireReplicatedRestore(RestorerFromBackup & restorer) const override;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
MemoryAccessStorage memory_storage TSA_GUARDED_BY(mutex);
|
||||
|
@ -21,7 +21,7 @@ std::vector<UUID> Role::findDependencies() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void Role::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
void Role::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
granted_roles.replaceDependencies(old_to_new_ids);
|
||||
settings.replaceDependencies(old_to_new_ids);
|
||||
|
@ -21,7 +21,7 @@ struct Role : public IAccessEntity
|
||||
AccessEntityType getType() const override { return TYPE; }
|
||||
|
||||
std::vector<UUID> findDependencies() const override;
|
||||
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
|
||||
};
|
||||
|
||||
|
@ -63,7 +63,7 @@ std::vector<UUID> RowPolicy::findDependencies() const
|
||||
return to_roles.findDependencies();
|
||||
}
|
||||
|
||||
void RowPolicy::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
void RowPolicy::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
to_roles.replaceDependencies(old_to_new_ids);
|
||||
}
|
||||
|
@ -50,7 +50,7 @@ struct RowPolicy : public IAccessEntity
|
||||
AccessEntityType getType() const override { return TYPE; }
|
||||
|
||||
std::vector<UUID> findDependencies() const override;
|
||||
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
bool isBackupAllowed() const override { return true; }
|
||||
|
||||
/// Which roles or users should use this row policy.
|
||||
|
@ -21,7 +21,7 @@ std::vector<UUID> SettingsProfile::findDependencies() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void SettingsProfile::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
void SettingsProfile::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
elements.replaceDependencies(old_to_new_ids);
|
||||
to_roles.replaceDependencies(old_to_new_ids);
|
||||
|
@ -22,7 +22,7 @@ struct SettingsProfile : public IAccessEntity
|
||||
AccessEntityType getType() const override { return TYPE; }
|
||||
|
||||
std::vector<UUID> findDependencies() const override;
|
||||
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
bool isBackupAllowed() const override { return elements.isBackupAllowed(); }
|
||||
};
|
||||
|
||||
|
@ -49,7 +49,7 @@ std::vector<UUID> User::findDependencies() const
|
||||
return res;
|
||||
}
|
||||
|
||||
void User::replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
void User::doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids)
|
||||
{
|
||||
default_roles.replaceDependencies(old_to_new_ids);
|
||||
granted_roles.replaceDependencies(old_to_new_ids);
|
||||
|
@ -32,7 +32,7 @@ struct User : public IAccessEntity
|
||||
void setName(const String & name_) override;
|
||||
|
||||
std::vector<UUID> findDependencies() const override;
|
||||
void replaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
void doReplaceDependencies(const std::unordered_map<UUID, UUID> & old_to_new_ids) override;
|
||||
bool isBackupAllowed() const override { return settings.isBackupAllowed(); }
|
||||
};
|
||||
|
||||
|
@ -3,370 +3,89 @@
|
||||
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/IdentifierNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class FunctionTreeNode : public AbstractFunction
|
||||
{
|
||||
public:
|
||||
class ArgumentTreeNode : public Argument
|
||||
{
|
||||
public:
|
||||
explicit ArgumentTreeNode(const IQueryTreeNode * argument_) : argument(argument_) {}
|
||||
std::unique_ptr<AbstractFunction> getFunction() const override
|
||||
{
|
||||
if (const auto * f = argument->as<FunctionNode>())
|
||||
return std::make_unique<FunctionTreeNode>(*f);
|
||||
return nullptr;
|
||||
}
|
||||
bool isIdentifier() const override { return argument->as<IdentifierNode>(); }
|
||||
bool tryGetString(String * res, bool allow_identifier) const override
|
||||
{
|
||||
if (const auto * literal = argument->as<ConstantNode>())
|
||||
{
|
||||
if (literal->getValue().getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->getValue().safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<IdentifierNode>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->getIdentifier().getFullName();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
private:
|
||||
const IQueryTreeNode * argument = nullptr;
|
||||
};
|
||||
|
||||
class ArgumentsTreeNode : public Arguments
|
||||
{
|
||||
public:
|
||||
explicit ArgumentsTreeNode(const QueryTreeNodes * arguments_) : arguments(arguments_) {}
|
||||
size_t size() const override { return arguments ? arguments->size() : 0; }
|
||||
std::unique_ptr<Argument> at(size_t n) const override { return std::make_unique<ArgumentTreeNode>(arguments->at(n).get()); }
|
||||
private:
|
||||
const QueryTreeNodes * arguments = nullptr;
|
||||
};
|
||||
|
||||
explicit FunctionTreeNode(const FunctionNode & function_) : function(&function_)
|
||||
{
|
||||
if (const auto & nodes = function->getArguments().getNodes(); !nodes.empty())
|
||||
arguments = std::make_unique<ArgumentsTreeNode>(&nodes);
|
||||
}
|
||||
String name() const override { return function->getFunctionName(); }
|
||||
private:
|
||||
const FunctionNode * function = nullptr;
|
||||
};
|
||||
|
||||
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
class FunctionSecretArgumentsFinderTreeNode
|
||||
class FunctionSecretArgumentsFinderTreeNode : public FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_) : function(function_), arguments(function.getArguments())
|
||||
explicit FunctionSecretArgumentsFinderTreeNode(const FunctionNode & function_)
|
||||
: FunctionSecretArgumentsFinder(std::make_unique<FunctionTreeNode>(function_))
|
||||
{
|
||||
if (arguments.getNodes().empty())
|
||||
if (!function->hasArguments())
|
||||
return;
|
||||
|
||||
findFunctionSecretArguments();
|
||||
findOrdinaryFunctionSecretArguments();
|
||||
}
|
||||
|
||||
struct Result
|
||||
{
|
||||
/// Result constructed by default means no arguments will be hidden.
|
||||
size_t start = static_cast<size_t>(-1);
|
||||
size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`).
|
||||
/// In all known cases secret arguments are consecutive
|
||||
bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments.
|
||||
/// E.g. "headers" in `url('..', headers('foo' = '[HIDDEN]'))`
|
||||
std::vector<std::string> nested_maps;
|
||||
|
||||
bool hasSecrets() const
|
||||
{
|
||||
return count != 0 || !nested_maps.empty();
|
||||
}
|
||||
};
|
||||
|
||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||
|
||||
private:
|
||||
const FunctionNode & function;
|
||||
const ListNode & arguments;
|
||||
FunctionSecretArgumentsFinder::Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (index >= arguments.getNodes().size())
|
||||
return;
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findFunctionSecretArguments()
|
||||
{
|
||||
const auto & name = function.getFunctionName();
|
||||
|
||||
if ((name == "mysql") || (name == "postgresql") || (name == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((name == "s3") || (name == "cosn") || (name == "oss") ||
|
||||
(name == "deltaLake") || (name == "hudi") || (name == "iceberg"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (name == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((name == "remote") || (name == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((name == "encrypt") || (name == "decrypt") ||
|
||||
(name == "aes_encrypt_mysql") || (name == "aes_decrypt_mysql") ||
|
||||
(name == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (name == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
const auto & nodes = arguments.getNodes();
|
||||
size_t count = nodes.size();
|
||||
while (count > 0)
|
||||
{
|
||||
const FunctionNode * f = nodes.at(count - 1)->as<FunctionNode>();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->getFunctionName() == "headers")
|
||||
result.nested_maps.push_back(f->getFunctionName());
|
||||
else if (f->getFunctionName() != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= arguments.getNodes().size())
|
||||
return false;
|
||||
|
||||
return tryGetStringFromArgument(arguments.getNodes()[arg_idx], res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const QueryTreeNodePtr argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
if (const auto * literal = argument->as<ConstantNode>())
|
||||
{
|
||||
if (literal->getValue().getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->getValue().safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<IdentifierNode>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->getIdentifier().getFullName();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (arguments.getNodes().size() < 3)
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
const auto * table_function = arguments.getNodes()[arg_num]->as<FunctionNode>();
|
||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->getFunctionName()))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::optional<String> database;
|
||||
std::optional<QualifiedTableName> qualified_table_name;
|
||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||
{
|
||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
++arg_num;
|
||||
if (database)
|
||||
{
|
||||
/// Skip the 'table' argument if the previous argument was a database name.
|
||||
++arg_num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip username.
|
||||
++arg_num;
|
||||
|
||||
/// Do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
/// Empty string is also allowed (it means the default database).
|
||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||
size_t arg_idx,
|
||||
std::optional<String> & res_database,
|
||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||
{
|
||||
res_database.reset();
|
||||
res_qualified_table_name.reset();
|
||||
|
||||
String str;
|
||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||
return false;
|
||||
|
||||
if (str.empty())
|
||||
{
|
||||
res_database = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||
if (!qualified_table_name)
|
||||
return false;
|
||||
|
||||
if (qualified_table_name->database.empty())
|
||||
res_database = std::move(qualified_table_name->table);
|
||||
else
|
||||
res_qualified_table_name = std::move(qualified_table_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (arguments.getNodes().empty())
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
result.start = 1;
|
||||
result.count = arguments.getNodes().size() - 1;
|
||||
}
|
||||
|
||||
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (arguments.getNodes().size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
const auto * identifier = arguments.getNodes()[arg_idx]->as<IdentifierNode>();
|
||||
return identifier != nullptr;
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < arguments.getNodes().size(); ++i)
|
||||
{
|
||||
const auto & argument = arguments.getNodes()[i];
|
||||
const auto * equals_func = argument->as<FunctionNode>();
|
||||
if (!equals_func || (equals_func->getFunctionName() != "equals"))
|
||||
continue;
|
||||
|
||||
const auto * expr_list = equals_func->getArguments().as<ListNode>();
|
||||
if (!expr_list)
|
||||
continue;
|
||||
|
||||
const auto & equal_args = expr_list->getNodes();
|
||||
if (equal_args.size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(equal_args[0], &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -2564,8 +2564,8 @@ void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
|
||||
if (node.getNullsAction() != NullsAction::EMPTY)
|
||||
throw Exception(
|
||||
ErrorCodes::SYNTAX_ERROR,
|
||||
"Function with name '{}' cannot use {} NULLS",
|
||||
node.getFunctionName(),
|
||||
"Function with name {} cannot use {} NULLS",
|
||||
backQuote(node.getFunctionName()),
|
||||
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
|
||||
}
|
||||
}
|
||||
@ -3228,16 +3228,16 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
auto hints = NamePrompter<2>::getHints(function_name, possible_function_names);
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
|
||||
"Function with name '{}' does not exist. In scope {}{}",
|
||||
function_name,
|
||||
"Function with name {} does not exist. In scope {}{}",
|
||||
backQuote(function_name),
|
||||
scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(hints));
|
||||
}
|
||||
|
||||
if (!function_lambda_arguments_indexes.empty())
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
"Aggregate function '{}' does not support lambda arguments",
|
||||
function_name);
|
||||
"Aggregate function {} does not support lambda arguments",
|
||||
backQuote(function_name));
|
||||
|
||||
auto action = function_node_ptr->getNullsAction();
|
||||
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
|
||||
@ -3674,10 +3674,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(
|
||||
|
||||
auto hints = IdentifierResolver::collectIdentifierTypoHints(unresolved_identifier, valid_identifiers);
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {}{} identifier '{}' in scope {}{}",
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {}{} identifier {} in scope {}{}",
|
||||
toStringLowercase(IdentifierLookupContext::EXPRESSION),
|
||||
message_clarification,
|
||||
unresolved_identifier.getFullName(),
|
||||
backQuote(unresolved_identifier.getFullName()),
|
||||
scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(hints));
|
||||
}
|
||||
|
@ -456,6 +456,9 @@ void Connection::sendAddendum()
|
||||
writeStringBinary(proto_recv_chunked, *out);
|
||||
}
|
||||
|
||||
if (server_revision >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
|
||||
writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, *out);
|
||||
|
||||
out->next();
|
||||
}
|
||||
|
||||
@ -526,6 +529,8 @@ void Connection::receiveHello(const Poco::Timespan & handshake_timeout)
|
||||
readVarUInt(server_version_major, *in);
|
||||
readVarUInt(server_version_minor, *in);
|
||||
readVarUInt(server_revision, *in);
|
||||
if (server_revision >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
|
||||
readVarUInt(server_parallel_replicas_protocol_version, *in);
|
||||
if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
|
||||
readStringBinary(server_timezone, *in);
|
||||
if (server_revision >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)
|
||||
@ -966,7 +971,7 @@ void Connection::sendReadTaskResponse(const String & response)
|
||||
void Connection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & response)
|
||||
{
|
||||
writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out);
|
||||
response.serialize(*out);
|
||||
response.serialize(*out, server_parallel_replicas_protocol_version);
|
||||
out->finishChunk();
|
||||
out->next();
|
||||
}
|
||||
@ -1420,7 +1425,7 @@ ParallelReadRequest Connection::receiveParallelReadRequest() const
|
||||
|
||||
InitialAllRangesAnnouncement Connection::receiveInitialParallelReadAnnouncement() const
|
||||
{
|
||||
return InitialAllRangesAnnouncement::deserialize(*in);
|
||||
return InitialAllRangesAnnouncement::deserialize(*in, server_parallel_replicas_protocol_version);
|
||||
}
|
||||
|
||||
|
||||
|
@ -212,6 +212,7 @@ private:
|
||||
UInt64 server_version_minor = 0;
|
||||
UInt64 server_version_patch = 0;
|
||||
UInt64 server_revision = 0;
|
||||
UInt64 server_parallel_replicas_protocol_version = 0;
|
||||
String server_timezone;
|
||||
String server_display_name;
|
||||
|
||||
|
@ -64,6 +64,7 @@ static struct InitFiu
|
||||
REGULAR(lazy_pipe_fds_fail_close) \
|
||||
PAUSEABLE(infinite_sleep) \
|
||||
PAUSEABLE(stop_moving_part_before_swap_with_active) \
|
||||
REGULAR(slowdown_index_analysis) \
|
||||
|
||||
|
||||
namespace FailPoints
|
||||
|
@ -376,6 +376,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(ParallelReplicasReadAssignedMarks, "Sum across all replicas of how many of scheduled marks were assigned by consistent hash") \
|
||||
M(ParallelReplicasReadUnassignedMarks, "Sum across all replicas of how many unassigned marks were scheduled") \
|
||||
M(ParallelReplicasReadAssignedForStealingMarks, "Sum across all replicas of how many of scheduled marks were assigned for stealing by consistent hash") \
|
||||
M(ParallelReplicasReadMarks, "How many marks were read by the given replica") \
|
||||
\
|
||||
M(ParallelReplicasStealingByHashMicroseconds, "Time spent collecting segments meant for stealing by hash") \
|
||||
M(ParallelReplicasProcessingPartsMicroseconds, "Time spent processing data parts") \
|
||||
@ -529,6 +530,7 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \
|
||||
M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \
|
||||
M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache") \
|
||||
M(CachedReadBufferPredownloadedBytes, "Bytes read from filesystem cache source. Cache segments are read from left to right as a whole, it might be that we need to predownload some part of the segment irrelevant for the current task just to get to the needed data") \
|
||||
M(CachedReadBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \
|
||||
M(CachedReadBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \
|
||||
M(CachedReadBufferCreateBufferMicroseconds, "Prepare buffer time") \
|
||||
|
@ -1,4 +1,4 @@
|
||||
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
|
||||
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp orc_string_dictionary.cpp)
|
||||
target_link_libraries (integer_hash_tables_and_hashes PRIVATE
|
||||
ch_contrib::gbenchmark_all
|
||||
dbms
|
||||
@ -7,3 +7,8 @@ target_link_libraries (integer_hash_tables_and_hashes PRIVATE
|
||||
ch_contrib::wyhash
|
||||
ch_contrib::farmhash
|
||||
ch_contrib::xxHash)
|
||||
|
||||
clickhouse_add_executable(orc_string_dictionary orc_string_dictionary.cpp)
|
||||
target_link_libraries (orc_string_dictionary PRIVATE
|
||||
ch_contrib::gbenchmark_all
|
||||
dbms)
|
||||
|
311
src/Common/benchmarks/orc_string_dictionary.cpp
Normal file
311
src/Common/benchmarks/orc_string_dictionary.cpp
Normal file
@ -0,0 +1,311 @@
|
||||
#include <cstdlib>
|
||||
#include <base/defines.h>
|
||||
#include <benchmark/benchmark.h>
|
||||
|
||||
class OldSortedStringDictionary
|
||||
{
|
||||
public:
|
||||
struct DictEntry
|
||||
{
|
||||
DictEntry(const char * str, size_t len) : data(str), length(len) { }
|
||||
const char * data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
OldSortedStringDictionary() : totalLength(0) { }
|
||||
|
||||
// insert a new string into dictionary, return its insertion order
|
||||
size_t insert(const char * str, size_t len);
|
||||
|
||||
// reorder input index buffer from insertion order to dictionary order
|
||||
void reorder(std::vector<int64_t> & idxBuffer) const;
|
||||
|
||||
// get dict entries in insertion order
|
||||
void getEntriesInInsertionOrder(std::vector<const DictEntry *> &) const;
|
||||
|
||||
size_t size() const;
|
||||
|
||||
// return total length of strings in the dictionary
|
||||
uint64_t length() const;
|
||||
|
||||
void clear();
|
||||
|
||||
// store indexes of insertion order in the dictionary for not-null rows
|
||||
std::vector<int64_t> idxInDictBuffer;
|
||||
|
||||
private:
|
||||
struct LessThan
|
||||
{
|
||||
bool operator()(const DictEntry & left, const DictEntry & right) const
|
||||
{
|
||||
int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
|
||||
if (ret != 0)
|
||||
{
|
||||
return ret < 0;
|
||||
}
|
||||
return left.length < right.length;
|
||||
}
|
||||
};
|
||||
|
||||
std::map<DictEntry, size_t, LessThan> dict;
|
||||
std::vector<std::vector<char>> data;
|
||||
uint64_t totalLength;
|
||||
};
|
||||
|
||||
// insert a new string into dictionary, return its insertion order
|
||||
size_t OldSortedStringDictionary::insert(const char * str, size_t len)
|
||||
{
|
||||
auto ret = dict.insert({DictEntry(str, len), dict.size()});
|
||||
if (ret.second)
|
||||
{
|
||||
// make a copy to internal storage
|
||||
data.push_back(std::vector<char>(len));
|
||||
memcpy(data.back().data(), str, len);
|
||||
// update dictionary entry to link pointer to internal storage
|
||||
DictEntry * entry = const_cast<DictEntry *>(&(ret.first->first));
|
||||
entry->data = data.back().data();
|
||||
totalLength += len;
|
||||
}
|
||||
return ret.first->second;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorder input index buffer from insertion order to dictionary order
|
||||
*
|
||||
* We require this function because string values are buffered by indexes
|
||||
* in their insertion order. Until the entire dictionary is complete can
|
||||
* we get their sorted indexes in the dictionary in that ORC specification
|
||||
* demands dictionary should be ordered. Therefore this function transforms
|
||||
* the indexes from insertion order to dictionary value order for final
|
||||
* output.
|
||||
*/
|
||||
void OldSortedStringDictionary::reorder(std::vector<int64_t> & idxBuffer) const
|
||||
{
|
||||
// iterate the dictionary to get mapping from insertion order to value order
|
||||
std::vector<size_t> mapping(dict.size());
|
||||
size_t dictIdx = 0;
|
||||
for (auto it = dict.cbegin(); it != dict.cend(); ++it)
|
||||
{
|
||||
mapping[it->second] = dictIdx++;
|
||||
}
|
||||
|
||||
// do the transformation
|
||||
for (size_t i = 0; i != idxBuffer.size(); ++i)
|
||||
{
|
||||
idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
|
||||
}
|
||||
}
|
||||
|
||||
// get dict entries in insertion order
|
||||
void OldSortedStringDictionary::getEntriesInInsertionOrder(std::vector<const DictEntry *> & entries) const
|
||||
{
|
||||
entries.resize(dict.size());
|
||||
for (auto it = dict.cbegin(); it != dict.cend(); ++it)
|
||||
{
|
||||
entries[it->second] = &(it->first);
|
||||
}
|
||||
}
|
||||
|
||||
// return count of entries
|
||||
size_t OldSortedStringDictionary::size() const
|
||||
{
|
||||
return dict.size();
|
||||
}
|
||||
|
||||
// return total length of strings in the dictionary
|
||||
uint64_t OldSortedStringDictionary::length() const
|
||||
{
|
||||
return totalLength;
|
||||
}
|
||||
|
||||
void OldSortedStringDictionary::clear()
|
||||
{
|
||||
totalLength = 0;
|
||||
data.clear();
|
||||
dict.clear();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementation of increasing sorted string dictionary
|
||||
*/
|
||||
class NewSortedStringDictionary
|
||||
{
|
||||
public:
|
||||
struct DictEntry
|
||||
{
|
||||
DictEntry(const char * str, size_t len) : data(str), length(len) { }
|
||||
const char * data;
|
||||
size_t length;
|
||||
};
|
||||
|
||||
struct DictEntryWithIndex
|
||||
{
|
||||
DictEntryWithIndex(const char * str, size_t len, size_t index_) : entry(str, len), index(index_) { }
|
||||
DictEntry entry;
|
||||
size_t index;
|
||||
};
|
||||
|
||||
NewSortedStringDictionary() : totalLength_(0) { }
|
||||
|
||||
// insert a new string into dictionary, return its insertion order
|
||||
size_t insert(const char * str, size_t len);
|
||||
|
||||
// reorder input index buffer from insertion order to dictionary order
|
||||
void reorder(std::vector<int64_t> & idxBuffer) const;
|
||||
|
||||
// get dict entries in insertion order
|
||||
void getEntriesInInsertionOrder(std::vector<const DictEntry *> &) const;
|
||||
|
||||
// return count of entries
|
||||
size_t size() const;
|
||||
|
||||
// return total length of strings in the dictionary
|
||||
uint64_t length() const;
|
||||
|
||||
void clear();
|
||||
|
||||
// store indexes of insertion order in the dictionary for not-null rows
|
||||
std::vector<int64_t> idxInDictBuffer;
|
||||
|
||||
private:
|
||||
struct LessThan
|
||||
{
|
||||
bool operator()(const DictEntryWithIndex & l, const DictEntryWithIndex & r)
|
||||
{
|
||||
const auto & left = l.entry;
|
||||
const auto & right = r.entry;
|
||||
int ret = memcmp(left.data, right.data, std::min(left.length, right.length));
|
||||
if (ret != 0)
|
||||
{
|
||||
return ret < 0;
|
||||
}
|
||||
return left.length < right.length;
|
||||
}
|
||||
};
|
||||
|
||||
mutable std::vector<DictEntryWithIndex> flatDict_;
|
||||
std::unordered_map<std::string, size_t> keyToIndex;
|
||||
uint64_t totalLength_;
|
||||
};
|
||||
|
||||
// insert a new string into dictionary, return its insertion order
|
||||
size_t NewSortedStringDictionary::insert(const char * str, size_t len)
|
||||
{
|
||||
size_t index = flatDict_.size();
|
||||
auto ret = keyToIndex.emplace(std::string(str, len), index);
|
||||
if (ret.second)
|
||||
{
|
||||
flatDict_.emplace_back(ret.first->first.data(), ret.first->first.size(), index);
|
||||
totalLength_ += len;
|
||||
}
|
||||
return ret.first->second;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reorder input index buffer from insertion order to dictionary order
|
||||
*
|
||||
* We require this function because string values are buffered by indexes
|
||||
* in their insertion order. Until the entire dictionary is complete can
|
||||
* we get their sorted indexes in the dictionary in that ORC specification
|
||||
* demands dictionary should be ordered. Therefore this function transforms
|
||||
* the indexes from insertion order to dictionary value order for final
|
||||
* output.
|
||||
*/
|
||||
void NewSortedStringDictionary::reorder(std::vector<int64_t> & idxBuffer) const
|
||||
{
|
||||
// iterate the dictionary to get mapping from insertion order to value order
|
||||
std::vector<size_t> mapping(flatDict_.size());
|
||||
for (size_t i = 0; i < flatDict_.size(); ++i)
|
||||
{
|
||||
mapping[flatDict_[i].index] = i;
|
||||
}
|
||||
|
||||
// do the transformation
|
||||
for (size_t i = 0; i != idxBuffer.size(); ++i)
|
||||
{
|
||||
idxBuffer[i] = static_cast<int64_t>(mapping[static_cast<size_t>(idxBuffer[i])]);
|
||||
}
|
||||
}
|
||||
|
||||
// get dict entries in insertion order
|
||||
void NewSortedStringDictionary::getEntriesInInsertionOrder(std::vector<const DictEntry *> & entries) const
|
||||
{
|
||||
std::sort(
|
||||
flatDict_.begin(),
|
||||
flatDict_.end(),
|
||||
[](const DictEntryWithIndex & left, const DictEntryWithIndex & right) { return left.index < right.index; });
|
||||
|
||||
entries.resize(flatDict_.size());
|
||||
for (size_t i = 0; i < flatDict_.size(); ++i)
|
||||
{
|
||||
entries[i] = &(flatDict_[i].entry);
|
||||
}
|
||||
}
|
||||
|
||||
// return count of entries
|
||||
size_t NewSortedStringDictionary::size() const
|
||||
{
|
||||
return flatDict_.size();
|
||||
}
|
||||
|
||||
// return total length of strings in the dictionary
|
||||
uint64_t NewSortedStringDictionary::length() const
|
||||
{
|
||||
return totalLength_;
|
||||
}
|
||||
|
||||
void NewSortedStringDictionary::clear()
|
||||
{
|
||||
totalLength_ = 0;
|
||||
keyToIndex.clear();
|
||||
flatDict_.clear();
|
||||
}
|
||||
|
||||
template <size_t cardinality>
|
||||
static std::vector<std::string> mockStrings()
|
||||
{
|
||||
std::vector<std::string> res(1000000);
|
||||
for (auto & s : res)
|
||||
{
|
||||
s = "test string dictionary " + std::to_string(rand() % cardinality);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename DictionaryImpl>
|
||||
static NO_INLINE std::unique_ptr<DictionaryImpl> createAndWriteStringDictionary(const std::vector<std::string> & strs)
|
||||
{
|
||||
auto dict = std::make_unique<DictionaryImpl>();
|
||||
for (const auto & str : strs)
|
||||
{
|
||||
auto index = dict->insert(str.data(), str.size());
|
||||
dict->idxInDictBuffer.push_back(index);
|
||||
}
|
||||
dict->reorder(dict->idxInDictBuffer);
|
||||
|
||||
return dict;
|
||||
}
|
||||
|
||||
template <typename DictionaryImpl, size_t cardinality>
|
||||
static void BM_writeStringDictionary(benchmark::State & state)
|
||||
{
|
||||
auto strs = mockStrings<cardinality>();
|
||||
for (auto _ : state)
|
||||
{
|
||||
auto dict = createAndWriteStringDictionary<DictionaryImpl>(strs);
|
||||
benchmark::DoNotOptimize(dict);
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 1000);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 1000);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 10000);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 10000);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, OldSortedStringDictionary, 100000);
|
||||
BENCHMARK_TEMPLATE(BM_writeStringDictionary, NewSortedStringDictionary, 100000);
|
||||
|
@ -28,6 +28,16 @@
|
||||
#include <Common/getMultipleKeysFromConfig.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
|
||||
#if USE_SSL
|
||||
# include <Server/CertificateReloader.h>
|
||||
# include <openssl/ssl.h>
|
||||
# include <Poco/Crypto/EVPPKey.h>
|
||||
# include <Poco/Net/Context.h>
|
||||
# include <Poco/Net/SSLManager.h>
|
||||
# include <Poco/Net/Utility.h>
|
||||
# include <Poco/StringTokenizer.h>
|
||||
#endif
|
||||
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
@ -48,6 +58,7 @@ namespace ErrorCodes
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int INVALID_CONFIG_PARAMETER;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
@ -56,6 +67,16 @@ namespace
|
||||
{
|
||||
|
||||
#if USE_SSL
|
||||
|
||||
int callSetCertificate(SSL * ssl, void * arg)
|
||||
{
|
||||
if (!arg)
|
||||
return -1;
|
||||
|
||||
const CertificateReloader::Data * data = reinterpret_cast<CertificateReloader::Data *>(arg);
|
||||
return setCertificateCallback(ssl, data, getLogger("SSLContext"));
|
||||
}
|
||||
|
||||
void setSSLParams(nuraft::asio_service::options & asio_opts)
|
||||
{
|
||||
const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
|
||||
@ -69,18 +90,55 @@ void setSSLParams(nuraft::asio_service::options & asio_opts)
|
||||
if (!config.has(private_key_file_property))
|
||||
throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Server private key file is not set.");
|
||||
|
||||
asio_opts.enable_ssl_ = true;
|
||||
asio_opts.server_cert_file_ = config.getString(certificate_file_property);
|
||||
asio_opts.server_key_file_ = config.getString(private_key_file_property);
|
||||
Poco::Net::Context::Params params;
|
||||
params.certificateFile = config.getString(certificate_file_property);
|
||||
if (params.certificateFile.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Server certificate file in config '{}' is empty", certificate_file_property);
|
||||
|
||||
params.privateKeyFile = config.getString(private_key_file_property);
|
||||
if (params.privateKeyFile.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Server key file in config '{}' is empty", private_key_file_property);
|
||||
|
||||
auto pass_phrase = config.getString("openSSL.server.privateKeyPassphraseHandler.options.password", "");
|
||||
auto certificate_data = std::make_shared<CertificateReloader::Data>(params.certificateFile, params.privateKeyFile, pass_phrase);
|
||||
|
||||
if (config.has(root_ca_file_property))
|
||||
asio_opts.root_cert_file_ = config.getString(root_ca_file_property);
|
||||
params.caLocation = config.getString(root_ca_file_property);
|
||||
|
||||
if (config.getBool("openSSL.server.loadDefaultCAFile", false))
|
||||
asio_opts.load_default_ca_file_ = true;
|
||||
params.loadDefaultCAs = config.getBool("openSSL.server.loadDefaultCAFile", false);
|
||||
params.verificationMode = Poco::Net::Utility::convertVerificationMode(config.getString("openSSL.server.verificationMode", "none"));
|
||||
|
||||
if (config.getString("openSSL.server.verificationMode", "none") == "none")
|
||||
asio_opts.skip_verification_ = true;
|
||||
std::string disabled_protocols_list = config.getString("openSSL.server.disableProtocols", "");
|
||||
Poco::StringTokenizer dp_tok(disabled_protocols_list, ";,", Poco::StringTokenizer::TOK_TRIM | Poco::StringTokenizer::TOK_IGNORE_EMPTY);
|
||||
int disabled_protocols = 0;
|
||||
for (const auto & token : dp_tok)
|
||||
{
|
||||
if (token == "sslv2")
|
||||
disabled_protocols |= Poco::Net::Context::PROTO_SSLV2;
|
||||
else if (token == "sslv3")
|
||||
disabled_protocols |= Poco::Net::Context::PROTO_SSLV3;
|
||||
else if (token == "tlsv1")
|
||||
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1;
|
||||
else if (token == "tlsv1_1")
|
||||
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_1;
|
||||
else if (token == "tlsv1_2")
|
||||
disabled_protocols |= Poco::Net::Context::PROTO_TLSV1_2;
|
||||
}
|
||||
|
||||
asio_opts.ssl_context_provider_server_ = [params, certificate_data, disabled_protocols]
|
||||
{
|
||||
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_SERVER_USE, params);
|
||||
context.disableProtocols(disabled_protocols);
|
||||
SSL_CTX * ssl_ctx = context.takeSslContext();
|
||||
SSL_CTX_set_cert_cb(ssl_ctx, callSetCertificate, reinterpret_cast<void *>(certificate_data.get()));
|
||||
return ssl_ctx;
|
||||
};
|
||||
|
||||
asio_opts.ssl_context_provider_client_ = [ctx_params = std::move(params)]
|
||||
{
|
||||
Poco::Net::Context context(Poco::Net::Context::Usage::TLSV1_2_CLIENT_USE, ctx_params);
|
||||
return context.takeSslContext();
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -1777,7 +1777,8 @@ private:
|
||||
auto child_path = (root_fs_path / child_name).generic_string();
|
||||
const auto actual_child_node_ptr = storage.uncommitted_state.getActualNodeView(child_path, child_node);
|
||||
|
||||
if (actual_child_node_ptr == nullptr) /// node was deleted in previous step of multi transaction
|
||||
/// if node was changed in previous step of multi transaction - skip until the uncommitted state visit
|
||||
if (actual_child_node_ptr != &child_node)
|
||||
continue;
|
||||
|
||||
if (checkLimits(actual_child_node_ptr))
|
||||
@ -1811,7 +1812,8 @@ private:
|
||||
|
||||
const auto actual_child_node_ptr = storage.uncommitted_state.getActualNodeView(child_path, child_node);
|
||||
|
||||
if (actual_child_node_ptr == nullptr) /// node was deleted in previous step of multi transaction
|
||||
/// if node was changed in previous step of multi transaction - skip until the uncommitted state visit
|
||||
if (actual_child_node_ptr != &child_node)
|
||||
continue;
|
||||
|
||||
if (checkLimits(actual_child_node_ptr))
|
||||
|
@ -613,13 +613,15 @@ public:
|
||||
|
||||
struct PathCmp
|
||||
{
|
||||
using is_transparent = std::true_type;
|
||||
|
||||
auto operator()(const std::string_view a,
|
||||
const std::string_view b) const
|
||||
{
|
||||
return a.size() < b.size() || (a.size() == b.size() && a < b);
|
||||
size_t level_a = std::count(a.begin(), a.end(), '/');
|
||||
size_t level_b = std::count(b.begin(), b.end(), '/');
|
||||
return level_a < level_b || (level_a == level_b && a < b);
|
||||
}
|
||||
|
||||
using is_transparent = void; // required to make find() work with different type than key_type
|
||||
};
|
||||
|
||||
mutable std::map<std::string, UncommittedNode, PathCmp> nodes;
|
||||
|
@ -3738,6 +3738,72 @@ TYPED_TEST(CoordinationTest, TestRemoveRecursiveInMultiRequest)
|
||||
ASSERT_FALSE(exists("/A/B"));
|
||||
ASSERT_FALSE(exists("/A/B/D"));
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("Recursive Remove For Subtree With Updated Node");
|
||||
int create_zxid = ++zxid;
|
||||
auto ops = prepare_create_tree();
|
||||
|
||||
/// First create nodes
|
||||
const auto create_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(create_request, 1, 0, create_zxid);
|
||||
auto create_responses = storage.processRequest(create_request, 1, create_zxid);
|
||||
ASSERT_EQ(create_responses.size(), 1);
|
||||
ASSERT_TRUE(is_multi_ok(create_responses[0].response));
|
||||
|
||||
/// Small limit
|
||||
int remove_zxid = ++zxid;
|
||||
ops = {
|
||||
zkutil::makeSetRequest("/A/B", "", -1),
|
||||
zkutil::makeRemoveRecursiveRequest("/A", 3),
|
||||
};
|
||||
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
|
||||
auto remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
|
||||
|
||||
ASSERT_EQ(remove_responses.size(), 1);
|
||||
ASSERT_FALSE(is_multi_ok(remove_responses[0].response));
|
||||
|
||||
/// Big limit
|
||||
remove_zxid = ++zxid;
|
||||
ops[1] = zkutil::makeRemoveRecursiveRequest("/A", 4);
|
||||
remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(remove_request, 1, 0, remove_zxid);
|
||||
remove_responses = storage.processRequest(remove_request, 1, remove_zxid);
|
||||
|
||||
ASSERT_EQ(remove_responses.size(), 1);
|
||||
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
|
||||
ASSERT_FALSE(exists("/A"));
|
||||
ASSERT_FALSE(exists("/A/C"));
|
||||
ASSERT_FALSE(exists("/A/B"));
|
||||
ASSERT_FALSE(exists("/A/B/D"));
|
||||
}
|
||||
|
||||
{
|
||||
SCOPED_TRACE("[BUG] Recursive Remove Level Sorting");
|
||||
int new_zxid = ++zxid;
|
||||
|
||||
Coordination::Requests ops = {
|
||||
zkutil::makeCreateRequest("/a", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/a/bbbbbb", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/A", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/A/B", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeCreateRequest("/A/CCCCCCCCCCCC", "", zkutil::CreateMode::Persistent),
|
||||
zkutil::makeRemoveRecursiveRequest("/A", 3),
|
||||
};
|
||||
auto remove_request = std::make_shared<ZooKeeperMultiRequest>(ops, ACLs{});
|
||||
storage.preprocessRequest(remove_request, 1, 0, new_zxid);
|
||||
auto remove_responses = storage.processRequest(remove_request, 1, new_zxid);
|
||||
|
||||
ASSERT_EQ(remove_responses.size(), 1);
|
||||
ASSERT_TRUE(is_multi_ok(remove_responses[0].response));
|
||||
ASSERT_TRUE(exists("/a"));
|
||||
ASSERT_TRUE(exists("/a/bbbbbb"));
|
||||
ASSERT_FALSE(exists("/A"));
|
||||
ASSERT_FALSE(exists("/A/B"));
|
||||
ASSERT_FALSE(exists("/A/CCCCCCCCCCCC"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TYPED_TEST(CoordinationTest, TestRemoveRecursiveWatches)
|
||||
|
@ -33,7 +33,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_AGGREGATE_FUNCTIONS_VERSIONING = 54
|
||||
|
||||
static constexpr auto DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION = 1;
|
||||
|
||||
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
|
||||
static constexpr auto DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION = 3;
|
||||
static constexpr auto DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION = 4;
|
||||
static constexpr auto DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS = 54453;
|
||||
|
||||
static constexpr auto DBMS_MERGE_TREE_PART_INFO_VERSION = 1;
|
||||
@ -86,6 +87,8 @@ static constexpr auto DBMS_MIN_REVISION_WITH_ROWS_BEFORE_AGGREGATION = 54469;
|
||||
/// Packets size header
|
||||
static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
|
||||
|
||||
static constexpr auto DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL = 54471;
|
||||
|
||||
/// Version of ClickHouse TCP protocol.
|
||||
///
|
||||
/// Should be incremented manually on protocol changes.
|
||||
@ -93,6 +96,6 @@ static constexpr auto DBMS_MIN_PROTOCOL_VERSION_WITH_CHUNKED_PACKETS = 54470;
|
||||
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
|
||||
/// later is just a number for server version (one number instead of commit SHA)
|
||||
/// for simplicity (sometimes it may be more convenient in some use cases).
|
||||
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54470;
|
||||
static constexpr auto DBMS_TCP_PROTOCOL_VERSION = 54471;
|
||||
|
||||
}
|
||||
|
@ -947,7 +947,7 @@ class IColumn;
|
||||
M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \
|
||||
M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \
|
||||
M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \
|
||||
M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \
|
||||
M(UInt64, parallel_replicas_mark_segment_size, 0, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing. Value should be in range [128; 16384]", 0) \
|
||||
M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as '<archive> :: <file>' if archive has correct extension", 0) \
|
||||
M(Bool, parallel_replicas_local_plan, false, "Build local plan for local replica", 0) \
|
||||
\
|
||||
@ -1272,6 +1272,7 @@ class IColumn;
|
||||
M(Bool, output_format_orc_string_as_string, true, "Use ORC String type instead of Binary for String columns", 0) \
|
||||
M(ORCCompression, output_format_orc_compression_method, "zstd", "Compression method for ORC output format. Supported codecs: lz4, snappy, zlib, zstd, none (uncompressed)", 0) \
|
||||
M(UInt64, output_format_orc_row_index_stride, 10'000, "Target row index stride in ORC output format", 0) \
|
||||
M(Double, output_format_orc_dictionary_key_size_threshold, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled", 0) \
|
||||
\
|
||||
M(CapnProtoEnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::CapnProtoEnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0) \
|
||||
\
|
||||
|
@ -71,6 +71,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
},
|
||||
{"24.9",
|
||||
{
|
||||
{"output_format_orc_dictionary_key_size_threshold", 0.0, 0.0, "For a string column in ORC output format, if the number of distinct values is greater than this fraction of the total number of non-null rows, turn off dictionary encoding. Otherwise dictionary encoding is enabled"},
|
||||
{"input_format_json_empty_as_default", false, false, "Added new setting to allow to treat empty fields in JSON input as default values."},
|
||||
{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
|
||||
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
|
||||
@ -78,6 +79,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
|
||||
{"output_format_always_quote_identifiers", false, false, "New setting."},
|
||||
{"output_format_identifier_quoting_style", "Backticks", "Backticks", "New setting."},
|
||||
{"parallel_replicas_mark_segment_size", 128, 0, "Value for this setting now determined automatically"},
|
||||
{"database_replicated_allow_replicated_engine_arguments", 1, 0, "Don't allow explicit arguments by default"},
|
||||
{"database_replicated_allow_explicit_uuid", 0, 0, "Added a new setting to disallow explicitly specifying table UUID"},
|
||||
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},
|
||||
|
@ -59,35 +59,27 @@ void cckMetadataPathForOrdinary(const ASTCreateQuery & create, const String & me
|
||||
|
||||
}
|
||||
|
||||
/// validate validates the database engine that's specified in the create query for
|
||||
/// engine arguments, settings and table overrides.
|
||||
void validate(const ASTCreateQuery & create_query)
|
||||
|
||||
void DatabaseFactory::validate(const ASTCreateQuery & create_query) const
|
||||
{
|
||||
auto * storage = create_query.storage;
|
||||
|
||||
/// Check engine may have arguments
|
||||
static const std::unordered_set<std::string_view> engines_with_arguments{"MySQL", "MaterializeMySQL", "MaterializedMySQL",
|
||||
"Lazy", "Replicated", "PostgreSQL", "MaterializedPostgreSQL", "SQLite", "Filesystem", "S3", "HDFS"};
|
||||
|
||||
const String & engine_name = storage->engine->name;
|
||||
bool engine_may_have_arguments = engines_with_arguments.contains(engine_name);
|
||||
const EngineFeatures & engine_features = database_engines.at(engine_name).features;
|
||||
|
||||
if (storage->engine->arguments && !engine_may_have_arguments)
|
||||
/// Check engine may have arguments
|
||||
if (storage->engine->arguments && !engine_features.supports_arguments)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have arguments", engine_name);
|
||||
|
||||
/// Check engine may have settings
|
||||
bool may_have_settings = endsWith(engine_name, "MySQL") || engine_name == "Replicated" || engine_name == "MaterializedPostgreSQL";
|
||||
bool has_unexpected_element = storage->engine->parameters || storage->partition_by ||
|
||||
storage->primary_key || storage->order_by ||
|
||||
storage->sample_by;
|
||||
if (has_unexpected_element || (!may_have_settings && storage->settings))
|
||||
if (has_unexpected_element || (!engine_features.supports_settings && storage->settings))
|
||||
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST,
|
||||
"Database engine `{}` cannot have parameters, primary_key, order_by, sample_by, settings", engine_name);
|
||||
|
||||
/// Check engine with table overrides
|
||||
static const std::unordered_set<std::string_view> engines_with_table_overrides{"MaterializeMySQL", "MaterializedMySQL", "MaterializedPostgreSQL"};
|
||||
if (create_query.table_overrides && !engines_with_table_overrides.contains(engine_name))
|
||||
if (create_query.table_overrides && !engine_features.supports_table_overrides)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Database engine `{}` cannot have table overrides", engine_name);
|
||||
}
|
||||
|
||||
@ -121,9 +113,9 @@ DatabasePtr DatabaseFactory::get(const ASTCreateQuery & create, const String & m
|
||||
return impl;
|
||||
}
|
||||
|
||||
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn)
|
||||
void DatabaseFactory::registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features)
|
||||
{
|
||||
if (!database_engines.emplace(name, std::move(creator_fn)).second)
|
||||
if (!database_engines.emplace(name, Creator{std::move(creator_fn), features}).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "DatabaseFactory: the database engine name '{}' is not unique", name);
|
||||
}
|
||||
|
||||
@ -154,7 +146,7 @@ DatabasePtr DatabaseFactory::getImpl(const ASTCreateQuery & create, const String
|
||||
.context = context};
|
||||
|
||||
// creator_fn creates and returns a DatabasePtr with the supplied arguments
|
||||
auto creator_fn = database_engines.at(engine_name);
|
||||
auto creator_fn = database_engines.at(engine_name).creator_fn;
|
||||
|
||||
return creator_fn(arguments);
|
||||
}
|
||||
|
@ -43,13 +43,30 @@ public:
|
||||
ContextPtr & context;
|
||||
};
|
||||
|
||||
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
|
||||
struct EngineFeatures
|
||||
{
|
||||
bool supports_arguments = false;
|
||||
bool supports_settings = false;
|
||||
bool supports_table_overrides = false;
|
||||
};
|
||||
|
||||
using CreatorFn = std::function<DatabasePtr(const Arguments & arguments)>;
|
||||
|
||||
using DatabaseEngines = std::unordered_map<std::string, CreatorFn>;
|
||||
struct Creator
|
||||
{
|
||||
CreatorFn creator_fn;
|
||||
EngineFeatures features;
|
||||
};
|
||||
|
||||
void registerDatabase(const std::string & name, CreatorFn creator_fn);
|
||||
DatabasePtr get(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
|
||||
|
||||
using DatabaseEngines = std::unordered_map<std::string, Creator>;
|
||||
|
||||
void registerDatabase(const std::string & name, CreatorFn creator_fn, EngineFeatures features = EngineFeatures{
|
||||
.supports_arguments = false,
|
||||
.supports_settings = false,
|
||||
.supports_table_overrides = false,
|
||||
});
|
||||
|
||||
const DatabaseEngines & getDatabaseEngines() const { return database_engines; }
|
||||
|
||||
@ -65,6 +82,10 @@ private:
|
||||
DatabaseEngines database_engines;
|
||||
|
||||
DatabasePtr getImpl(const ASTCreateQuery & create, const String & metadata_path, ContextPtr context);
|
||||
|
||||
/// validate validates the database engine that's specified in the create query for
|
||||
/// engine arguments, settings and table overrides.
|
||||
void validate(const ASTCreateQuery & create_query) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -257,6 +257,6 @@ void registerDatabaseFilesystem(DatabaseFactory & factory)
|
||||
|
||||
return std::make_shared<DatabaseFilesystem>(args.database_name, init_path, args.context);
|
||||
};
|
||||
factory.registerDatabase("Filesystem", create_fn);
|
||||
factory.registerDatabase("Filesystem", create_fn, {.supports_arguments = true});
|
||||
}
|
||||
}
|
||||
|
@ -253,7 +253,7 @@ void registerDatabaseHDFS(DatabaseFactory & factory)
|
||||
|
||||
return std::make_shared<DatabaseHDFS>(args.database_name, source_url, args.context);
|
||||
};
|
||||
factory.registerDatabase("HDFS", create_fn);
|
||||
factory.registerDatabase("HDFS", create_fn, {.supports_arguments = true});
|
||||
}
|
||||
} // DB
|
||||
|
||||
|
@ -398,6 +398,6 @@ void registerDatabaseLazy(DatabaseFactory & factory)
|
||||
cache_expiration_time_seconds,
|
||||
args.context);
|
||||
};
|
||||
factory.registerDatabase("Lazy", create_fn);
|
||||
factory.registerDatabase("Lazy", create_fn, {.supports_arguments = true});
|
||||
}
|
||||
}
|
||||
|
@ -2001,6 +2001,6 @@ void registerDatabaseReplicated(DatabaseFactory & factory)
|
||||
replica_name,
|
||||
std::move(database_replicated_settings), args.context);
|
||||
};
|
||||
factory.registerDatabase("Replicated", create_fn);
|
||||
factory.registerDatabase("Replicated", create_fn, {.supports_arguments = true, .supports_settings = true});
|
||||
}
|
||||
}
|
||||
|
@ -326,7 +326,7 @@ void registerDatabaseS3(DatabaseFactory & factory)
|
||||
|
||||
return std::make_shared<DatabaseS3>(args.database_name, config, args.context);
|
||||
};
|
||||
factory.registerDatabase("S3", create_fn);
|
||||
factory.registerDatabase("S3", create_fn, {.supports_arguments = true});
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -290,8 +290,14 @@ void registerDatabaseMaterializedMySQL(DatabaseFactory & factory)
|
||||
binlog_client,
|
||||
std::move(materialize_mode_settings));
|
||||
};
|
||||
factory.registerDatabase("MaterializeMySQL", create_fn);
|
||||
factory.registerDatabase("MaterializedMySQL", create_fn);
|
||||
|
||||
DatabaseFactory::EngineFeatures features{
|
||||
.supports_arguments = true,
|
||||
.supports_settings = true,
|
||||
.supports_table_overrides = true,
|
||||
};
|
||||
factory.registerDatabase("MaterializeMySQL", create_fn, features);
|
||||
factory.registerDatabase("MaterializedMySQL", create_fn, features);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -584,7 +584,7 @@ void registerDatabaseMySQL(DatabaseFactory & factory)
|
||||
throw Exception(ErrorCodes::CANNOT_CREATE_DATABASE, "Cannot create MySQL database, because {}", exception_message);
|
||||
}
|
||||
};
|
||||
factory.registerDatabase("MySQL", create_fn);
|
||||
factory.registerDatabase("MySQL", create_fn, {.supports_arguments = true, .supports_settings = true});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -546,7 +546,11 @@ void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory)
|
||||
args.database_name, configuration.database, connection_info,
|
||||
std::move(postgresql_replica_settings));
|
||||
};
|
||||
factory.registerDatabase("MaterializedPostgreSQL", create_fn);
|
||||
factory.registerDatabase("MaterializedPostgreSQL", create_fn, {
|
||||
.supports_arguments = true,
|
||||
.supports_settings = true,
|
||||
.supports_table_overrides = true,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -558,7 +558,7 @@ void registerDatabasePostgreSQL(DatabaseFactory & factory)
|
||||
pool,
|
||||
use_table_cache);
|
||||
};
|
||||
factory.registerDatabase("PostgreSQL", create_fn);
|
||||
factory.registerDatabase("PostgreSQL", create_fn, {.supports_arguments = true});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -220,7 +220,7 @@ void registerDatabaseSQLite(DatabaseFactory & factory)
|
||||
|
||||
return std::make_shared<DatabaseSQLite>(args.context, engine_define, args.create_query.attach, database_path);
|
||||
};
|
||||
factory.registerDatabase("SQLite", create_fn);
|
||||
factory.registerDatabase("SQLite", create_fn, {.supports_arguments = true});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -28,6 +28,7 @@ extern const Event CachedReadBufferReadFromCacheMicroseconds;
|
||||
extern const Event CachedReadBufferCacheWriteMicroseconds;
|
||||
extern const Event CachedReadBufferReadFromSourceBytes;
|
||||
extern const Event CachedReadBufferReadFromCacheBytes;
|
||||
extern const Event CachedReadBufferPredownloadedBytes;
|
||||
extern const Event CachedReadBufferCacheWriteBytes;
|
||||
extern const Event CachedReadBufferCreateBufferMicroseconds;
|
||||
|
||||
@ -644,6 +645,7 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
|
||||
size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
|
||||
ProfileEvents::increment(ProfileEvents::CachedReadBufferPredownloadedBytes, current_impl_buffer_size);
|
||||
|
||||
std::string failure_reason;
|
||||
bool continue_predownload = file_segment.reserve(
|
||||
|
@ -244,6 +244,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.orc.output_string_as_string = settings.output_format_orc_string_as_string;
|
||||
format_settings.orc.output_compression_method = settings.output_format_orc_compression_method;
|
||||
format_settings.orc.output_row_index_stride = settings.output_format_orc_row_index_stride;
|
||||
format_settings.orc.output_dictionary_key_size_threshold = settings.output_format_orc_dictionary_key_size_threshold;
|
||||
format_settings.orc.use_fast_decoder = settings.input_format_orc_use_fast_decoder;
|
||||
format_settings.orc.filter_push_down = settings.input_format_orc_filter_push_down;
|
||||
format_settings.orc.reader_time_zone_name = settings.input_format_orc_reader_time_zone_name;
|
||||
|
@ -415,6 +415,7 @@ struct FormatSettings
|
||||
bool filter_push_down = true;
|
||||
UInt64 output_row_index_stride = 10'000;
|
||||
String reader_time_zone_name = "GMT";
|
||||
double output_dictionary_key_size_threshold = 0.0;
|
||||
} orc{};
|
||||
|
||||
/// For capnProto format we should determine how to
|
||||
|
@ -15,7 +15,6 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
@ -38,13 +37,6 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION,
|
||||
"Function {} needs at least one argument; passed {}.",
|
||||
getName(),
|
||||
arguments.size());
|
||||
|
||||
DataTypes arguments_types;
|
||||
for (size_t index = 0; index < arguments.size(); ++index)
|
||||
{
|
||||
@ -68,9 +60,16 @@ public:
|
||||
}
|
||||
|
||||
ColumnPtr
|
||||
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
|
||||
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
size_t num_arguments = arguments.size();
|
||||
if (num_arguments == 0)
|
||||
{
|
||||
auto res_col = result_type->createColumn();
|
||||
res_col->insertDefault();
|
||||
return ColumnConst::create(std::move(res_col), input_rows_count);
|
||||
}
|
||||
|
||||
Columns holders(num_arguments);
|
||||
Columns tuple_columns(num_arguments);
|
||||
|
||||
|
@ -718,7 +718,12 @@ FileCache::getOrSet(
|
||||
}
|
||||
}
|
||||
|
||||
chassert(file_segments_limit ? file_segments.back()->range().left <= result_range.right : file_segments.back()->range().contains(result_range.right));
|
||||
chassert(file_segments_limit
|
||||
? file_segments.back()->range().left <= result_range.right
|
||||
: file_segments.back()->range().contains(result_range.right),
|
||||
fmt::format("Unexpected state. Back: {}, result range: {}, limit: {}",
|
||||
file_segments.back()->range().toString(), result_range.toString(), file_segments_limit));
|
||||
|
||||
chassert(!file_segments_limit || file_segments.size() <= file_segments_limit);
|
||||
|
||||
return std::make_unique<FileSegmentsHolder>(std::move(file_segments));
|
||||
|
@ -532,7 +532,7 @@ void executeQueryWithParallelReplicas(
|
||||
max_replicas_to_use = shard.getAllNodeCount();
|
||||
}
|
||||
|
||||
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(max_replicas_to_use, settings.parallel_replicas_mark_segment_size);
|
||||
auto coordinator = std::make_shared<ParallelReplicasReadingCoordinator>(max_replicas_to_use);
|
||||
|
||||
auto external_tables = new_context->getExternalTables();
|
||||
|
||||
|
@ -380,100 +380,99 @@ BlockIO InterpreterDropQuery::executeToDatabase(const ASTDropQuery & query)
|
||||
|
||||
BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector<UUID> & uuids_to_wait)
|
||||
{
|
||||
if (query.kind != ASTDropQuery::Kind::Detach && query.kind != ASTDropQuery::Kind::Drop && query.kind != ASTDropQuery::Kind::Truncate)
|
||||
return {};
|
||||
|
||||
const auto & database_name = query.getDatabase();
|
||||
auto ddl_guard = DatabaseCatalog::instance().getDDLGuard(database_name, "");
|
||||
|
||||
database = tryGetDatabase(database_name, query.if_exists);
|
||||
if (database)
|
||||
if (!database)
|
||||
return {};
|
||||
|
||||
bool drop = query.kind == ASTDropQuery::Kind::Drop;
|
||||
bool truncate = query.kind == ASTDropQuery::Kind::Truncate;
|
||||
|
||||
getContext()->checkAccess(AccessType::DROP_DATABASE, database_name);
|
||||
|
||||
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases");
|
||||
|
||||
if (query.if_empty)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
|
||||
|
||||
if (!truncate && database->hasReplicationThread())
|
||||
database->stopReplication();
|
||||
|
||||
if (database->shouldBeEmptyOnDetach())
|
||||
{
|
||||
if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop
|
||||
|| query.kind == ASTDropQuery::Kind::Truncate)
|
||||
/// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish.
|
||||
/// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below.
|
||||
auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name);
|
||||
|
||||
ASTDropQuery query_for_table;
|
||||
query_for_table.kind = query.kind;
|
||||
// For truncate operation on database, drop the tables
|
||||
if (truncate)
|
||||
query_for_table.kind = query.has_all_tables ? ASTDropQuery::Kind::Truncate : ASTDropQuery::Kind::Drop;
|
||||
query_for_table.if_exists = true;
|
||||
query_for_table.if_empty = false;
|
||||
query_for_table.setDatabase(database_name);
|
||||
query_for_table.sync = query.sync;
|
||||
|
||||
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
|
||||
/// since in this case getTablesIterator() may do some additional work,
|
||||
/// see DatabaseMaterializedMySQL::getTablesIterator()
|
||||
auto table_context = Context::createCopy(getContext());
|
||||
table_context->setInternalQuery(true);
|
||||
/// Do not hold extra shared pointers to tables
|
||||
std::vector<std::pair<StorageID, bool>> tables_to_drop;
|
||||
// NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`.
|
||||
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
bool drop = query.kind == ASTDropQuery::Kind::Drop;
|
||||
bool truncate = query.kind == ASTDropQuery::Kind::Truncate;
|
||||
auto table_ptr = iterator->table();
|
||||
tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()});
|
||||
}
|
||||
|
||||
getContext()->checkAccess(AccessType::DROP_DATABASE, database_name);
|
||||
|
||||
if (query.kind == ASTDropQuery::Kind::Detach && query.permanently)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases");
|
||||
|
||||
if (query.if_empty)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases");
|
||||
|
||||
if (!truncate && database->hasReplicationThread())
|
||||
database->stopReplication();
|
||||
|
||||
if (database->shouldBeEmptyOnDetach())
|
||||
/// Prepare tables for shutdown in parallel.
|
||||
ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables");
|
||||
for (const auto & [name, _] : tables_to_drop)
|
||||
{
|
||||
auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context);
|
||||
runner([my_table_ptr = std::move(table_ptr)]()
|
||||
{
|
||||
/// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish.
|
||||
/// So it will not startup tables concurrently with the flushAndPrepareForShutdown call below.
|
||||
auto restart_replica_lock = DatabaseCatalog::instance().getLockForDropDatabase(database_name);
|
||||
my_table_ptr->flushAndPrepareForShutdown();
|
||||
});
|
||||
}
|
||||
runner.waitForAllToFinishAndRethrowFirstError();
|
||||
|
||||
ASTDropQuery query_for_table;
|
||||
query_for_table.kind = query.kind;
|
||||
// For truncate operation on database, drop the tables
|
||||
if (truncate)
|
||||
query_for_table.kind = query.has_all_tables ? ASTDropQuery::Kind::Truncate : ASTDropQuery::Kind::Drop;
|
||||
query_for_table.if_exists = true;
|
||||
query_for_table.if_empty = false;
|
||||
query_for_table.setDatabase(database_name);
|
||||
query_for_table.sync = query.sync;
|
||||
|
||||
/// Flush should not be done if shouldBeEmptyOnDetach() == false,
|
||||
/// since in this case getTablesIterator() may do some additional work,
|
||||
/// see DatabaseMaterializedMySQL::getTablesIterator()
|
||||
auto table_context = Context::createCopy(getContext());
|
||||
table_context->setInternalQuery(true);
|
||||
/// Do not hold extra shared pointers to tables
|
||||
std::vector<std::pair<StorageID, bool>> tables_to_drop;
|
||||
// NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`.
|
||||
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
|
||||
{
|
||||
auto table_ptr = iterator->table();
|
||||
tables_to_drop.push_back({table_ptr->getStorageID(), table_ptr->isDictionary()});
|
||||
}
|
||||
|
||||
/// Prepare tables for shutdown in parallel.
|
||||
ThreadPoolCallbackRunnerLocal<void> runner(getDatabaseCatalogDropTablesThreadPool().get(), "DropTables");
|
||||
for (const auto & [name, _] : tables_to_drop)
|
||||
{
|
||||
auto table_ptr = DatabaseCatalog::instance().getTable(name, table_context);
|
||||
runner([my_table_ptr = std::move(table_ptr)]()
|
||||
{
|
||||
my_table_ptr->flushAndPrepareForShutdown();
|
||||
});
|
||||
}
|
||||
runner.waitForAllToFinishAndRethrowFirstError();
|
||||
|
||||
for (const auto & table : tables_to_drop)
|
||||
{
|
||||
query_for_table.setTable(table.first.getTableName());
|
||||
query_for_table.is_dictionary = table.second;
|
||||
DatabasePtr db;
|
||||
UUID table_to_wait = UUIDHelpers::Nil;
|
||||
executeToTableImpl(table_context, query_for_table, db, table_to_wait);
|
||||
uuids_to_wait.push_back(table_to_wait);
|
||||
}
|
||||
}
|
||||
// only if operation is DETACH
|
||||
if ((!drop || !truncate) && query.sync)
|
||||
{
|
||||
/// Avoid "some tables are still in use" when sync mode is enabled
|
||||
for (const auto & table_uuid : uuids_to_wait)
|
||||
database->waitDetachedTableNotInUse(table_uuid);
|
||||
}
|
||||
|
||||
/// Protects from concurrent CREATE TABLE queries
|
||||
auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name);
|
||||
// only if operation is DETACH
|
||||
if (!drop || !truncate)
|
||||
database->assertCanBeDetached(true);
|
||||
|
||||
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
|
||||
if (!truncate)
|
||||
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
|
||||
for (const auto & table : tables_to_drop)
|
||||
{
|
||||
query_for_table.setTable(table.first.getTableName());
|
||||
query_for_table.is_dictionary = table.second;
|
||||
DatabasePtr db;
|
||||
UUID table_to_wait = UUIDHelpers::Nil;
|
||||
executeToTableImpl(table_context, query_for_table, db, table_to_wait);
|
||||
uuids_to_wait.push_back(table_to_wait);
|
||||
}
|
||||
}
|
||||
// only if operation is DETACH
|
||||
if ((!drop || !truncate) && query.sync)
|
||||
{
|
||||
/// Avoid "some tables are still in use" when sync mode is enabled
|
||||
for (const auto & table_uuid : uuids_to_wait)
|
||||
database->waitDetachedTableNotInUse(table_uuid);
|
||||
}
|
||||
|
||||
/// Protects from concurrent CREATE TABLE queries
|
||||
auto db_guard = DatabaseCatalog::instance().getExclusiveDDLGuardForDatabase(database_name);
|
||||
// only if operation is DETACH
|
||||
if (!drop || !truncate)
|
||||
database->assertCanBeDetached(true);
|
||||
|
||||
/// DETACH or DROP database itself. If TRUNCATE skip dropping/erasing the database.
|
||||
if (!truncate)
|
||||
DatabaseCatalog::instance().detachDatabase(getContext(), database_name, drop, database->shouldBeEmptyOnDetach());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
@ -1,10 +1,42 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
#include <base/defines.h>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class AbstractFunction
|
||||
{
|
||||
friend class FunctionSecretArgumentsFinder;
|
||||
public:
|
||||
class Argument
|
||||
{
|
||||
public:
|
||||
virtual ~Argument() = default;
|
||||
virtual std::unique_ptr<AbstractFunction> getFunction() const = 0;
|
||||
virtual bool isIdentifier() const = 0;
|
||||
virtual bool tryGetString(String * res, bool allow_identifier) const = 0;
|
||||
};
|
||||
class Arguments
|
||||
{
|
||||
public:
|
||||
virtual ~Arguments() = default;
|
||||
virtual size_t size() const = 0;
|
||||
virtual std::unique_ptr<Argument> at(size_t n) const = 0;
|
||||
};
|
||||
|
||||
virtual ~AbstractFunction() = default;
|
||||
virtual String name() const = 0;
|
||||
bool hasArguments() const { return !!arguments; }
|
||||
|
||||
protected:
|
||||
std::unique_ptr<Arguments> arguments;
|
||||
};
|
||||
|
||||
class FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
@ -23,6 +55,485 @@ public:
|
||||
return count != 0 || !nested_maps.empty();
|
||||
}
|
||||
};
|
||||
|
||||
explicit FunctionSecretArgumentsFinder(std::unique_ptr<AbstractFunction> && function_) : function(std::move(function_)) {}
|
||||
|
||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||
|
||||
protected:
|
||||
const std::unique_ptr<AbstractFunction> function;
|
||||
Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (index >= function->arguments->size())
|
||||
return;
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findOrdinaryFunctionSecretArguments()
|
||||
{
|
||||
if ((function->name() == "mysql") || (function->name() == "postgresql") || (function->name() == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((function->name() == "s3") || (function->name() == "cosn") || (function->name() == "oss") ||
|
||||
(function->name() == "deltaLake") || (function->name() == "hudi") || (function->name() == "iceberg") ||
|
||||
(function->name() == "gcs"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function->name() == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if (function->name() == "azureBlobStorage")
|
||||
{
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function->name() == "azureBlobStorageCluster")
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((function->name() == "remote") || (function->name() == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((function->name() == "encrypt") || (function->name() == "decrypt") ||
|
||||
(function->name() == "aes_encrypt_mysql") || (function->name() == "aes_decrypt_mysql") ||
|
||||
(function->name() == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (function->name() == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
size_t count = function->arguments->size();
|
||||
while (count > 0)
|
||||
{
|
||||
const auto f = function->arguments->at(count - 1)->getFunction();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->name() == "headers")
|
||||
result.nested_maps.push_back(f->name());
|
||||
else if (f->name() != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 1);
|
||||
return;
|
||||
}
|
||||
else if (is_cluster_function && isNamedCollectionName(1))
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 2);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
size_t count = function->arguments->size();
|
||||
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
|
||||
if (url_arg_idx + 4 < count)
|
||||
markSecretArgument(url_arg_idx + 4);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= function->arguments->size())
|
||||
return false;
|
||||
|
||||
return tryGetStringFromArgument(*function->arguments->at(arg_idx), res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const AbstractFunction::Argument & argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
return argument.tryGetString(res, allow_identifier);
|
||||
}
|
||||
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (function->arguments->size() < 3)
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
auto table_function = function->arguments->at(arg_num)->getFunction();
|
||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name()))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::optional<String> database;
|
||||
std::optional<QualifiedTableName> qualified_table_name;
|
||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||
{
|
||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
++arg_num;
|
||||
if (database)
|
||||
{
|
||||
/// Skip the 'table' argument if the previous argument was a database name.
|
||||
++arg_num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip username.
|
||||
++arg_num;
|
||||
|
||||
/// Do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
/// Empty string is also allowed (it means the default database).
|
||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||
size_t arg_idx,
|
||||
std::optional<String> & res_database,
|
||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||
{
|
||||
res_database.reset();
|
||||
res_qualified_table_name.reset();
|
||||
|
||||
String str;
|
||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||
return false;
|
||||
|
||||
if (str.empty())
|
||||
{
|
||||
res_database = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||
if (!qualified_table_name)
|
||||
return false;
|
||||
|
||||
if (qualified_table_name->database.empty())
|
||||
res_database = std::move(qualified_table_name->table);
|
||||
else
|
||||
res_qualified_table_name = std::move(qualified_table_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (function->arguments->size() == 0)
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
result.start = 1;
|
||||
result.count = function->arguments->size() - 1;
|
||||
}
|
||||
|
||||
void findTableEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function->name();
|
||||
if (engine_name == "ExternalDistributed")
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
findExternalDistributedTableEngineSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
|
||||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
|
||||
{
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
findS3TableEngineSecretArguments();
|
||||
}
|
||||
else if (engine_name == "URL")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findExternalDistributedTableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(1))
|
||||
{
|
||||
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
markSecretArgument(5);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3TableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((3 <= count) && (count <= 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (count == 3)
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (2 < count)
|
||||
markSecretArgument(2);
|
||||
}
|
||||
|
||||
void findDatabaseEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function->name();
|
||||
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
||||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
||||
findMySQLDatabaseSecretArguments();
|
||||
}
|
||||
else if (engine_name == "S3")
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
findS3DatabaseSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLDatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// MySQL(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
markSecretArgument(3);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3DatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'password', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
void findBackupNameSecretArguments()
|
||||
{
|
||||
const String & engine_name = function->name();
|
||||
if (engine_name == "S3")
|
||||
{
|
||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (function->arguments->size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
return function->arguments->at(arg_idx)->isIdentifier();
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < function->arguments->size(); ++i)
|
||||
{
|
||||
const auto & argument = function->arguments->at(i);
|
||||
const auto equals_func = argument->getFunction();
|
||||
if (!equals_func || (equals_func->name() != "equals"))
|
||||
continue;
|
||||
|
||||
if (!equals_func->arguments || equals_func->arguments->size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(*equals_func->arguments->at(0), &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,35 +1,97 @@
|
||||
#pragma once
|
||||
|
||||
#include <Parsers/FunctionSecretArgumentsFinder.h>
|
||||
#include <Core/QualifiedTableName.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Common/KnownObjectNames.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
class FunctionSecretArgumentsFinderAST
|
||||
class FunctionAST : public AbstractFunction
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_) : function(function_)
|
||||
class ArgumentAST : public Argument
|
||||
{
|
||||
if (!function.arguments)
|
||||
public:
|
||||
explicit ArgumentAST(const IAST * argument_) : argument(argument_) {}
|
||||
std::unique_ptr<AbstractFunction> getFunction() const override
|
||||
{
|
||||
if (const auto * f = argument->as<ASTFunction>())
|
||||
return std::make_unique<FunctionAST>(*f);
|
||||
return nullptr;
|
||||
}
|
||||
bool isIdentifier() const override { return argument->as<ASTIdentifier>(); }
|
||||
bool tryGetString(String * res, bool allow_identifier) const override
|
||||
{
|
||||
if (const auto * literal = argument->as<ASTLiteral>())
|
||||
{
|
||||
if (literal->value.getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument->as<ASTIdentifier>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->name();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
private:
|
||||
const IAST * argument = nullptr;
|
||||
};
|
||||
|
||||
class ArgumentsAST : public Arguments
|
||||
{
|
||||
public:
|
||||
explicit ArgumentsAST(const ASTs * arguments_) : arguments(arguments_) {}
|
||||
size_t size() const override { return arguments ? arguments->size() : 0; }
|
||||
std::unique_ptr<Argument> at(size_t n) const override
|
||||
{
|
||||
return std::make_unique<ArgumentAST>(arguments->at(n).get());
|
||||
}
|
||||
private:
|
||||
const ASTs * arguments = nullptr;
|
||||
};
|
||||
|
||||
explicit FunctionAST(const ASTFunction & function_) : function(&function_)
|
||||
{
|
||||
if (!function->arguments)
|
||||
return;
|
||||
|
||||
const auto * expr_list = function.arguments->as<ASTExpressionList>();
|
||||
const auto * expr_list = function->arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
return;
|
||||
|
||||
arguments = &expr_list->children;
|
||||
switch (function.kind)
|
||||
arguments = std::make_unique<ArgumentsAST>(&expr_list->children);
|
||||
}
|
||||
|
||||
String name() const override { return function->name; }
|
||||
private:
|
||||
const ASTFunction * function = nullptr;
|
||||
};
|
||||
|
||||
/// Finds arguments of a specified function which should not be displayed for most users for security reasons.
|
||||
/// That involves passwords and secret keys.
|
||||
class FunctionSecretArgumentsFinderAST : public FunctionSecretArgumentsFinder
|
||||
{
|
||||
public:
|
||||
explicit FunctionSecretArgumentsFinderAST(const ASTFunction & function_)
|
||||
: FunctionSecretArgumentsFinder(std::make_unique<FunctionAST>(function_))
|
||||
{
|
||||
if (!function->hasArguments())
|
||||
return;
|
||||
|
||||
switch (function_.kind)
|
||||
{
|
||||
case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break;
|
||||
case ASTFunction::Kind::WINDOW_FUNCTION: break;
|
||||
@ -43,507 +105,7 @@ public:
|
||||
}
|
||||
|
||||
FunctionSecretArgumentsFinder::Result getResult() const { return result; }
|
||||
|
||||
private:
|
||||
const ASTFunction & function;
|
||||
const ASTs * arguments = nullptr;
|
||||
FunctionSecretArgumentsFinder::Result result;
|
||||
|
||||
void markSecretArgument(size_t index, bool argument_is_named = false)
|
||||
{
|
||||
if (index >= arguments->size())
|
||||
return;
|
||||
if (!result.count)
|
||||
{
|
||||
result.start = index;
|
||||
result.are_named = argument_is_named;
|
||||
}
|
||||
chassert(index >= result.start); /// We always check arguments consecutively
|
||||
result.count = index + 1 - result.start;
|
||||
if (!argument_is_named)
|
||||
result.are_named = false;
|
||||
}
|
||||
|
||||
void findOrdinaryFunctionSecretArguments()
|
||||
{
|
||||
if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb"))
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// postgresql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// mongodb('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss") ||
|
||||
(function.name == "deltaLake") || (function.name == "hudi") || (function.name == "iceberg") ||
|
||||
(function.name == "gcs"))
|
||||
{
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function.name == "s3Cluster")
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
findS3FunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if (function.name == "azureBlobStorage")
|
||||
{
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ false);
|
||||
}
|
||||
else if (function.name == "azureBlobStorageCluster")
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
findAzureBlobStorageFunctionSecretArguments(/* is_cluster_function= */ true);
|
||||
}
|
||||
else if ((function.name == "remote") || (function.name == "remoteSecure"))
|
||||
{
|
||||
/// remote('addresses_expr', 'db', 'table', 'user', 'password', ...)
|
||||
findRemoteFunctionSecretArguments();
|
||||
}
|
||||
else if ((function.name == "encrypt") || (function.name == "decrypt") ||
|
||||
(function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") ||
|
||||
(function.name == "tryDecrypt"))
|
||||
{
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad])
|
||||
findEncryptionFunctionSecretArguments();
|
||||
}
|
||||
else if (function.name == "url")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// mysql(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// mysql('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
markSecretArgument(4);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of arguments excluding "headers" and "extra_credentials" (which should
|
||||
/// always be at the end). Marks "headers" as secret, if found.
|
||||
size_t excludeS3OrURLNestedMaps()
|
||||
{
|
||||
size_t count = arguments->size();
|
||||
while (count > 0)
|
||||
{
|
||||
const ASTFunction * f = arguments->at(count - 1)->as<ASTFunction>();
|
||||
if (!f)
|
||||
break;
|
||||
if (f->name == "headers")
|
||||
result.nested_maps.push_back(f->name);
|
||||
else if (f->name != "extra_credentials")
|
||||
break;
|
||||
count -= 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
void findS3FunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// s3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// s3('url', 'format', 'structure' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((url_arg_idx + 3 <= count) && (count <= url_arg_idx + 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...)
|
||||
/// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (url_arg_idx + 2 < count)
|
||||
markSecretArgument(url_arg_idx + 2);
|
||||
}
|
||||
|
||||
void findAzureBlobStorageFunctionSecretArguments(bool is_cluster_function)
|
||||
{
|
||||
/// azureBlobStorage('cluster_name', 'conn_string/storage_account_url', ...) has 'conn_string/storage_account_url' as its second argument.
|
||||
size_t url_arg_idx = is_cluster_function ? 1 : 0;
|
||||
|
||||
if (!is_cluster_function && isNamedCollectionName(0))
|
||||
{
|
||||
/// azureBlobStorage(named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 1);
|
||||
return;
|
||||
}
|
||||
else if (is_cluster_function && isNamedCollectionName(1))
|
||||
{
|
||||
/// azureBlobStorageCluster(cluster, named_collection, ..., account_key = 'account_key', ...)
|
||||
findSecretNamedArgument("account_key", 2);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case storage_account_url is not used
|
||||
/// azureBlobStorage(connection_string|storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure)
|
||||
/// azureBlobStorageCluster(cluster, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure])
|
||||
size_t count = arguments->size();
|
||||
if ((url_arg_idx + 4 <= count) && (count <= url_arg_idx + 7))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(url_arg_idx + 3, &second_arg))
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: s3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
|
||||
/// We're going to replace 'account_key' with '[HIDDEN]' if account_key is used in the signature
|
||||
if (url_arg_idx + 4 < count)
|
||||
markSecretArgument(url_arg_idx + 4);
|
||||
}
|
||||
|
||||
void findURLSecretArguments()
|
||||
{
|
||||
if (!isNamedCollectionName(0))
|
||||
excludeS3OrURLNestedMaps();
|
||||
}
|
||||
|
||||
bool tryGetStringFromArgument(size_t arg_idx, String * res, bool allow_identifier = true) const
|
||||
{
|
||||
if (arg_idx >= arguments->size())
|
||||
return false;
|
||||
|
||||
return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier);
|
||||
}
|
||||
|
||||
static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true)
|
||||
{
|
||||
if (const auto * literal = argument.as<ASTLiteral>())
|
||||
{
|
||||
if (literal->value.getType() != Field::Types::String)
|
||||
return false;
|
||||
if (res)
|
||||
*res = literal->value.safeGet<String>();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (allow_identifier)
|
||||
{
|
||||
if (const auto * id = argument.as<ASTIdentifier>())
|
||||
{
|
||||
if (res)
|
||||
*res = id->name();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void findRemoteFunctionSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// remote(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We're going to replace 'password' with '[HIDDEN'] for the following signatures:
|
||||
/// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key])
|
||||
/// remote('addresses_expr', table_function(), 'user' [, 'password'] [, sharding_key])
|
||||
|
||||
/// But we should check the number of arguments first because we don't need to do any replacements in case of
|
||||
/// remote('addresses_expr', db.table)
|
||||
if (arguments->size() < 3)
|
||||
return;
|
||||
|
||||
size_t arg_num = 1;
|
||||
|
||||
/// Skip 1 or 2 arguments with table_function() or db.table or 'db', 'table'.
|
||||
const auto * table_function = (*arguments)[arg_num]->as<ASTFunction>();
|
||||
if (table_function && KnownTableFunctionNames::instance().exists(table_function->name))
|
||||
{
|
||||
++arg_num;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::optional<String> database;
|
||||
std::optional<QualifiedTableName> qualified_table_name;
|
||||
if (!tryGetDatabaseNameOrQualifiedTableName(arg_num, database, qualified_table_name))
|
||||
{
|
||||
/// We couldn't evaluate the argument so we don't know whether it is 'db.table' or just 'db'.
|
||||
/// Hence we can't figure out whether we should skip one argument 'user' or two arguments 'table', 'user'
|
||||
/// before the argument 'password'. So it's safer to wipe two arguments just in case.
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `user`.
|
||||
markSecretArgument(arg_num + 2);
|
||||
}
|
||||
if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false))
|
||||
{
|
||||
/// Wipe either `password` or `sharding_key`.
|
||||
markSecretArgument(arg_num + 3);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/// Skip the current argument (which is either a database name or a qualified table name).
|
||||
++arg_num;
|
||||
if (database)
|
||||
{
|
||||
/// Skip the 'table' argument if the previous argument was a database name.
|
||||
++arg_num;
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip username.
|
||||
++arg_num;
|
||||
|
||||
/// Do our replacement:
|
||||
/// remote('addresses_expr', db.table, 'user', 'password', ...) -> remote('addresses_expr', db.table, 'user', '[HIDDEN]', ...)
|
||||
/// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string
|
||||
/// before wiping it (because the `password` argument is always a literal string).
|
||||
bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false);
|
||||
if (can_be_password)
|
||||
markSecretArgument(arg_num);
|
||||
}
|
||||
|
||||
/// Tries to get either a database name or a qualified table name from an argument.
|
||||
/// Empty string is also allowed (it means the default database).
|
||||
/// The function is used by findRemoteFunctionSecretArguments() to determine how many arguments to skip before a password.
|
||||
bool tryGetDatabaseNameOrQualifiedTableName(
|
||||
size_t arg_idx,
|
||||
std::optional<String> & res_database,
|
||||
std::optional<QualifiedTableName> & res_qualified_table_name) const
|
||||
{
|
||||
res_database.reset();
|
||||
res_qualified_table_name.reset();
|
||||
|
||||
String str;
|
||||
if (!tryGetStringFromArgument(arg_idx, &str, /* allow_identifier= */ true))
|
||||
return false;
|
||||
|
||||
if (str.empty())
|
||||
{
|
||||
res_database = "";
|
||||
return true;
|
||||
}
|
||||
|
||||
auto qualified_table_name = QualifiedTableName::tryParseFromString(str);
|
||||
if (!qualified_table_name)
|
||||
return false;
|
||||
|
||||
if (qualified_table_name->database.empty())
|
||||
res_database = std::move(qualified_table_name->table);
|
||||
else
|
||||
res_qualified_table_name = std::move(qualified_table_name);
|
||||
return true;
|
||||
}
|
||||
|
||||
void findEncryptionFunctionSecretArguments()
|
||||
{
|
||||
if (arguments->empty())
|
||||
return;
|
||||
|
||||
/// We replace all arguments after 'mode' with '[HIDDEN]':
|
||||
/// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]')
|
||||
result.start = 1;
|
||||
result.count = arguments->size() - 1;
|
||||
}
|
||||
|
||||
void findTableEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if (engine_name == "ExternalDistributed")
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
findExternalDistributedTableEngineSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...)
|
||||
/// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...)
|
||||
findMySQLFunctionSecretArguments();
|
||||
}
|
||||
else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") ||
|
||||
(engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue"))
|
||||
{
|
||||
/// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...)
|
||||
findS3TableEngineSecretArguments();
|
||||
}
|
||||
else if (engine_name == "URL")
|
||||
{
|
||||
findURLSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findExternalDistributedTableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(1))
|
||||
{
|
||||
/// ExternalDistributed('engine', named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password')
|
||||
markSecretArgument(5);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3TableEngineSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'secret_access_key')
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
return;
|
||||
}
|
||||
|
||||
/// We should check other arguments first because we don't need to do any replacement in case of
|
||||
/// S3('url', NOSIGN, 'format' [, 'compression'] [, extra_credentials(..)] [, headers(..)])
|
||||
/// S3('url', 'format', 'compression' [, extra_credentials(..)] [, headers(..)])
|
||||
size_t count = excludeS3OrURLNestedMaps();
|
||||
if ((3 <= count) && (count <= 4))
|
||||
{
|
||||
String second_arg;
|
||||
if (tryGetStringFromArgument(1, &second_arg))
|
||||
{
|
||||
if (boost::iequals(second_arg, "NOSIGN"))
|
||||
return; /// The argument after 'url' is "NOSIGN".
|
||||
|
||||
if (count == 3)
|
||||
{
|
||||
if (second_arg == "auto" || KnownFormatNames::instance().exists(second_arg))
|
||||
return; /// The argument after 'url' is a format: S3('url', 'format', ...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We replace 'aws_secret_access_key' with '[HIDDEN]' for the following signatures:
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format')
|
||||
/// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression')
|
||||
if (2 < count)
|
||||
markSecretArgument(2);
|
||||
}
|
||||
|
||||
void findDatabaseEngineSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") ||
|
||||
(engine_name == "MaterializedMySQL") || (engine_name == "PostgreSQL") ||
|
||||
(engine_name == "MaterializedPostgreSQL"))
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
/// PostgreSQL('host:port', 'database', 'user', 'password')
|
||||
findMySQLDatabaseSecretArguments();
|
||||
}
|
||||
else if (engine_name == "S3")
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
findS3DatabaseSecretArguments();
|
||||
}
|
||||
}
|
||||
|
||||
void findMySQLDatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// MySQL(named_collection, ..., password = 'password', ...)
|
||||
findSecretNamedArgument("password", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// MySQL('host:port', 'database', 'user', 'password')
|
||||
markSecretArgument(3);
|
||||
}
|
||||
}
|
||||
|
||||
void findS3DatabaseSecretArguments()
|
||||
{
|
||||
if (isNamedCollectionName(0))
|
||||
{
|
||||
/// S3(named_collection, ..., secret_access_key = 'password', ...)
|
||||
findSecretNamedArgument("secret_access_key", 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// S3('url', 'access_key_id', 'secret_access_key')
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
void findBackupNameSecretArguments()
|
||||
{
|
||||
const String & engine_name = function.name;
|
||||
if (engine_name == "S3")
|
||||
{
|
||||
/// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key])
|
||||
markSecretArgument(2);
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a specified argument can be the name of a named collection?
|
||||
bool isNamedCollectionName(size_t arg_idx) const
|
||||
{
|
||||
if (arguments->size() <= arg_idx)
|
||||
return false;
|
||||
|
||||
const auto * identifier = (*arguments)[arg_idx]->as<ASTIdentifier>();
|
||||
return identifier != nullptr;
|
||||
}
|
||||
|
||||
/// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified.
|
||||
void findSecretNamedArgument(const std::string_view & key, size_t start = 0)
|
||||
{
|
||||
for (size_t i = start; i < arguments->size(); ++i)
|
||||
{
|
||||
const auto & argument = (*arguments)[i];
|
||||
const auto * equals_func = argument->as<ASTFunction>();
|
||||
if (!equals_func || (equals_func->name != "equals"))
|
||||
continue;
|
||||
|
||||
const auto * expr_list = equals_func->arguments->as<ASTExpressionList>();
|
||||
if (!expr_list)
|
||||
continue;
|
||||
|
||||
const auto & equal_args = expr_list->children;
|
||||
if (equal_args.size() != 2)
|
||||
continue;
|
||||
|
||||
String found_key;
|
||||
if (!tryGetStringFromArgument(*equal_args[0], &found_key))
|
||||
continue;
|
||||
|
||||
if (found_key == key)
|
||||
markSecretArgument(i, /* argument_is_named= */ true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
@ -78,7 +78,9 @@ void ORCOutputStream::write(const void* buf, size_t length)
|
||||
}
|
||||
|
||||
ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_)
|
||||
: IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_)
|
||||
: IOutputFormat(header_, out_)
|
||||
, format_settings{format_settings_}
|
||||
, output_stream(out_)
|
||||
{
|
||||
for (const auto & type : header_.getDataTypes())
|
||||
data_types.push_back(recursiveRemoveLowCardinality(type));
|
||||
@ -565,6 +567,7 @@ void ORCBlockOutputFormat::prepareWriter()
|
||||
schema = orc::createStructType();
|
||||
options.setCompression(getORCCompression(format_settings.orc.output_compression_method));
|
||||
options.setRowIndexStride(format_settings.orc.output_row_index_stride);
|
||||
options.setDictionaryKeySizeThreshold(format_settings.orc.output_dictionary_key_size_threshold);
|
||||
size_t columns_count = header.columns();
|
||||
for (size_t i = 0; i != columns_count; ++i)
|
||||
schema->addStructField(header.safeGetByPosition(i).name, getORCType(recursiveRemoveLowCardinality(data_types[i])));
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Processors/QueryPlan/ReadFromMergeTree.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
@ -8,6 +10,8 @@
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/parseIdentifierOrStringLiteral.h>
|
||||
#include <Processors/ConcatProcessor.h>
|
||||
#include <Processors/Merges/AggregatingSortedTransform.h>
|
||||
#include <Processors/Merges/CollapsingSortedTransform.h>
|
||||
@ -16,6 +20,7 @@
|
||||
#include <Processors/Merges/ReplacingSortedTransform.h>
|
||||
#include <Processors/Merges/SummingSortedTransform.h>
|
||||
#include <Processors/Merges/VersionedCollapsingTransform.h>
|
||||
#include <Processors/QueryPlan/IQueryPlanStep.h>
|
||||
#include <Processors/QueryPlan/PartsSplitter.h>
|
||||
#include <Processors/Sources/NullSource.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
@ -24,10 +29,11 @@
|
||||
#include <Processors/Transforms/SelectByIndicesTransform.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexLegacyVectorSimilarity.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPool.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexVectorSimilarity.h>
|
||||
#include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPool.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPoolInOrder.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h>
|
||||
@ -41,11 +47,6 @@
|
||||
#include <Common/JSONBuilder.h>
|
||||
#include <Common/isLocalAddress.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Processors/QueryPlan/IQueryPlanStep.h>
|
||||
#include <Parsers/parseIdentifierOrStringLiteral.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexMinMax.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
@ -381,6 +382,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit
|
||||
.all_callback = all_ranges_callback.value(),
|
||||
.callback = read_task_callback.value(),
|
||||
.number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica),
|
||||
.total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().at(0).getAllNodeCount(),
|
||||
};
|
||||
|
||||
/// We have a special logic for local replica. It has to read less data, because in some cases it should
|
||||
@ -563,6 +565,7 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
.all_callback = all_ranges_callback.value(),
|
||||
.callback = read_task_callback.value(),
|
||||
.number_of_current_replica = number_of_current_replica.value_or(client_info.number_of_current_replica),
|
||||
.total_nodes_count = context->getClusterForParallelReplicas()->getShardsInfo().at(0).getAllNodeCount(),
|
||||
};
|
||||
|
||||
auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier;
|
||||
|
@ -34,8 +34,12 @@ int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::Mu
|
||||
auto current = pdata->data.get();
|
||||
if (!current)
|
||||
return -1;
|
||||
return setCertificateCallback(ssl, current.get(), log);
|
||||
}
|
||||
|
||||
if (current->certs_chain.empty())
|
||||
int setCertificateCallback(SSL * ssl, const CertificateReloader::Data * current_data, LoggerPtr log)
|
||||
{
|
||||
if (current_data->certs_chain.empty())
|
||||
return -1;
|
||||
|
||||
if (auto err = SSL_clear_chain_certs(ssl); err != 1)
|
||||
@ -43,12 +47,12 @@ int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::Mu
|
||||
LOG_ERROR(log, "Clear certificates {}", Poco::Net::Utility::getLastError());
|
||||
return -1;
|
||||
}
|
||||
if (auto err = SSL_use_certificate(ssl, const_cast<X509 *>(current->certs_chain[0].certificate())); err != 1)
|
||||
if (auto err = SSL_use_certificate(ssl, const_cast<X509 *>(current_data->certs_chain[0].certificate())); err != 1)
|
||||
{
|
||||
LOG_ERROR(log, "Use certificate {}", Poco::Net::Utility::getLastError());
|
||||
return -1;
|
||||
}
|
||||
for (auto cert = current->certs_chain.begin() + 1; cert != current->certs_chain.end(); cert++)
|
||||
for (auto cert = current_data->certs_chain.begin() + 1; cert != current_data->certs_chain.end(); cert++)
|
||||
{
|
||||
if (auto err = SSL_add1_chain_cert(ssl, const_cast<X509 *>(cert->certificate())); err != 1)
|
||||
{
|
||||
@ -56,7 +60,7 @@ int CertificateReloader::setCertificate(SSL * ssl, const CertificateReloader::Mu
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
if (auto err = SSL_use_PrivateKey(ssl, const_cast<EVP_PKEY *>(static_cast<const EVP_PKEY *>(current->key))); err != 1)
|
||||
if (auto err = SSL_use_PrivateKey(ssl, const_cast<EVP_PKEY *>(static_cast<const EVP_PKEY *>(current_data->key))); err != 1)
|
||||
{
|
||||
LOG_ERROR(log, "Use private key {}", Poco::Net::Utility::getLastError());
|
||||
return -1;
|
||||
|
@ -104,6 +104,9 @@ private:
|
||||
mutable std::mutex data_mutex;
|
||||
};
|
||||
|
||||
/// A callback for OpenSSL
|
||||
int setCertificateCallback(SSL * ssl, const CertificateReloader::Data * current_data, LoggerPtr log);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1271,7 +1271,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked()
|
||||
void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRangesAnnouncement announcement)
|
||||
{
|
||||
writeVarUInt(Protocol::Server::MergeTreeAllRangesAnnouncement, *out);
|
||||
announcement.serialize(*out);
|
||||
announcement.serialize(*out, client_parallel_replicas_protocol_version);
|
||||
|
||||
out->finishChunk();
|
||||
out->next();
|
||||
@ -1281,7 +1281,7 @@ void TCPHandler::sendMergeTreeAllRangesAnnouncementAssumeLocked(InitialAllRanges
|
||||
void TCPHandler::sendMergeTreeReadTaskRequestAssumeLocked(ParallelReadRequest request)
|
||||
{
|
||||
writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out);
|
||||
request.serialize(*out);
|
||||
request.serialize(*out, client_parallel_replicas_protocol_version);
|
||||
|
||||
out->finishChunk();
|
||||
out->next();
|
||||
@ -1663,6 +1663,9 @@ void TCPHandler::receiveAddendum()
|
||||
readStringBinary(proto_send_chunked_cl, *in);
|
||||
readStringBinary(proto_recv_chunked_cl, *in);
|
||||
}
|
||||
|
||||
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
|
||||
readVarUInt(client_parallel_replicas_protocol_version, *in);
|
||||
}
|
||||
|
||||
|
||||
@ -1690,6 +1693,8 @@ void TCPHandler::sendHello()
|
||||
writeVarUInt(VERSION_MAJOR, *out);
|
||||
writeVarUInt(VERSION_MINOR, *out);
|
||||
writeVarUInt(DBMS_TCP_PROTOCOL_VERSION, *out);
|
||||
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
|
||||
writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, *out);
|
||||
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_TIMEZONE)
|
||||
writeStringBinary(DateLUT::instance().getTimeZone(), *out);
|
||||
if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME)
|
||||
|
@ -191,6 +191,7 @@ private:
|
||||
UInt64 client_version_minor = 0;
|
||||
UInt64 client_version_patch = 0;
|
||||
UInt32 client_tcp_protocol_version = 0;
|
||||
UInt32 client_parallel_replicas_protocol_version = 0;
|
||||
String proto_send_chunked_cl = "notchunked";
|
||||
String proto_recv_chunked_cl = "notchunked";
|
||||
String quota_key;
|
||||
|
@ -1142,6 +1142,16 @@ bool AlterCommands::hasFullTextIndex(const StorageInMemoryMetadata & metadata)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AlterCommands::hasLegacyInvertedIndex(const StorageInMemoryMetadata & metadata)
|
||||
{
|
||||
for (const auto & index : metadata.secondary_indices)
|
||||
{
|
||||
if (index.type == INVERTED_INDEX_NAME)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AlterCommands::hasVectorSimilarityIndex(const StorageInMemoryMetadata & metadata)
|
||||
{
|
||||
for (const auto & index : metadata.secondary_indices)
|
||||
|
@ -235,8 +235,9 @@ public:
|
||||
/// additional mutation command (MATERIALIZE_TTL) will be returned.
|
||||
MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const;
|
||||
|
||||
/// Check if commands have any full-text index
|
||||
/// Check if commands have any full-text index or a (legacy) inverted index
|
||||
static bool hasFullTextIndex(const StorageInMemoryMetadata & metadata);
|
||||
static bool hasLegacyInvertedIndex(const StorageInMemoryMetadata & metadata);
|
||||
|
||||
/// Check if commands have any vector similarity index
|
||||
static bool hasVectorSimilarityIndex(const StorageInMemoryMetadata & metadata);
|
||||
|
@ -3230,6 +3230,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Experimental full-text index feature is not enabled (turn on setting 'allow_experimental_full_text_index')");
|
||||
|
||||
if (AlterCommands::hasLegacyInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Experimental inverted index feature is not enabled (turn on setting 'allow_experimental_inverted_index')");
|
||||
|
||||
if (AlterCommands::hasVectorSimilarityIndex(new_metadata) && !settings.allow_experimental_vector_similarity_index)
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
|
||||
"Experimental vector similarity index is disabled (turn on setting 'allow_experimental_vector_similarity_index')");
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Processors/ConcatProcessor.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
@ -40,6 +41,8 @@
|
||||
#include <Core/UUID.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/FailPoint.h>
|
||||
#include <base/sleep.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
@ -74,6 +77,11 @@ namespace ErrorCodes
|
||||
extern const int DUPLICATED_PART_UUIDS;
|
||||
}
|
||||
|
||||
namespace FailPoints
|
||||
{
|
||||
extern const char slowdown_index_analysis[];
|
||||
}
|
||||
|
||||
|
||||
MergeTreeDataSelectExecutor::MergeTreeDataSelectExecutor(const MergeTreeData & data_)
|
||||
: data(data_), log(getLogger(data.getLogName() + " (SelectExecutor)"))
|
||||
@ -528,6 +536,8 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
}
|
||||
|
||||
auto query_context = context->hasQueryContext() ? context->getQueryContext() : context;
|
||||
QueryStatusPtr query_status = context->getProcessListElement();
|
||||
|
||||
PartFilterCounters part_filter_counters;
|
||||
if (query_context->getSettingsRef().allow_experimental_query_deduplication)
|
||||
selectPartsToReadWithUUIDFilter(
|
||||
@ -549,7 +559,8 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition(
|
||||
minmax_columns_types,
|
||||
partition_pruner,
|
||||
max_block_numbers_to_read,
|
||||
part_filter_counters);
|
||||
part_filter_counters,
|
||||
query_status);
|
||||
|
||||
index_stats.emplace_back(ReadFromMergeTree::IndexStat{
|
||||
.type = ReadFromMergeTree::IndexType::None,
|
||||
@ -649,8 +660,13 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
|
||||
auto mark_cache = context->getIndexMarkCache();
|
||||
auto uncompressed_cache = context->getIndexUncompressedCache();
|
||||
|
||||
auto query_status = context->getProcessListElement();
|
||||
|
||||
auto process_part = [&](size_t part_index)
|
||||
{
|
||||
if (query_status)
|
||||
query_status->checkTimeLimit();
|
||||
|
||||
auto & part = parts[part_index];
|
||||
|
||||
RangesInDataPart ranges(part, part_index);
|
||||
@ -1545,13 +1561,22 @@ void MergeTreeDataSelectExecutor::selectPartsToRead(
|
||||
const DataTypes & minmax_columns_types,
|
||||
const std::optional<PartitionPruner> & partition_pruner,
|
||||
const PartitionIdToMaxBlock * max_block_numbers_to_read,
|
||||
PartFilterCounters & counters)
|
||||
PartFilterCounters & counters,
|
||||
QueryStatusPtr query_status)
|
||||
{
|
||||
MergeTreeData::DataPartsVector prev_parts;
|
||||
std::swap(prev_parts, parts);
|
||||
|
||||
for (const auto & part_or_projection : prev_parts)
|
||||
{
|
||||
if (query_status)
|
||||
query_status->checkTimeLimit();
|
||||
|
||||
fiu_do_on(FailPoints::slowdown_index_analysis,
|
||||
{
|
||||
sleepForMilliseconds(1000);
|
||||
});
|
||||
|
||||
const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get();
|
||||
if (part_values && part_values->find(part->name) == part_values->end())
|
||||
continue;
|
||||
|
@ -126,7 +126,8 @@ private:
|
||||
const DataTypes & minmax_columns_types,
|
||||
const std::optional<PartitionPruner> & partition_pruner,
|
||||
const PartitionIdToMaxBlock * max_block_numbers_to_read,
|
||||
PartFilterCounters & counters);
|
||||
PartFilterCounters & counters,
|
||||
QueryStatusPtr query_status);
|
||||
|
||||
/// Same as previous but also skip parts uuids if any to the query context, or skip parts which uuids marked as excluded.
|
||||
static void selectPartsToReadWithUUIDFilter(
|
||||
|
@ -1,6 +1,92 @@
|
||||
#include <iterator>
|
||||
#include <Storages/MergeTree/MergeTreeReadPoolParallelReplicas.h>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <ranges>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
size_t chooseSegmentSize(
|
||||
LoggerPtr log, size_t mark_segment_size, size_t min_marks_per_task, size_t threads, size_t sum_marks, size_t number_of_replicas)
|
||||
{
|
||||
/// Mark segment size determines the granularity of work distribution between replicas.
|
||||
/// Namely, coordinator will take mark segments of size `mark_segment_size` granules, calculate hash of this segment and assign it to corresponding replica.
|
||||
/// Small segments are good when we read a small random subset of a table, big - when we do full-scan over a large table.
|
||||
/// With small segments there is a problem: consider a query like `select max(time) from wikistat`. Average size of `time` per granule is ~5KB. So when we
|
||||
/// read 128 granules we still read only ~0.5MB of data. With default fs cache segment size of 4MB it means a lot of data will be downloaded and written
|
||||
/// in cache for no reason. General case will look like this:
|
||||
///
|
||||
/// +---------- useful data
|
||||
/// v
|
||||
/// +------+--+------+
|
||||
/// |------|++| |
|
||||
/// |------|++| |
|
||||
/// +------+--+------+
|
||||
/// ^
|
||||
/// predownloaded data -----------+
|
||||
///
|
||||
/// Having large segments solves all the problems in this case. Also bigger segments mean less requests (especially for big tables and full-scans).
|
||||
/// These three values below chosen mostly intuitively. 128 granules is 1M rows - just a good starting point, 16384 seems to still make sense when reading
|
||||
/// billions of rows and 1024 - is a reasonable point in between. We limit our choice to only these three options because when we change segment size
|
||||
/// we essentially change distribution of data between replicas and of course we don't want to use simultaneously tens of different distributions, because
|
||||
/// it would be a huge waste of cache space.
|
||||
constexpr std::array<size_t, 3> borders{128, 1024, 16384};
|
||||
|
||||
LOG_DEBUG(
|
||||
log,
|
||||
"mark_segment_size={}, min_marks_per_task*threads={}, sum_marks/number_of_replicas^2={}",
|
||||
mark_segment_size,
|
||||
min_marks_per_task * threads,
|
||||
sum_marks / number_of_replicas / number_of_replicas);
|
||||
|
||||
/// Here we take max of two numbers:
|
||||
/// * (min_marks_per_task * threads) = the number of marks we request from the coordinator each time - there is no point to have segments smaller than one unit of work for a replica
|
||||
/// * (sum_marks / number_of_replicas^2) - we use consistent hashing for work distribution (including work stealing). If we have a really slow replica
|
||||
/// everything except (1/number_of_replicas) portion of its work will be stolen by other replicas. And it owns (1/number_of_replicas) share of total number of marks.
|
||||
/// Also important to note here that sum_marks is calculated after PK analysis, it means in particular that different segment sizes might be used for the
|
||||
/// same table for different queries (it is intentional).
|
||||
///
|
||||
/// Positive `mark_segment_size` means it is a user provided value, we have to preserve it.
|
||||
if (mark_segment_size == 0)
|
||||
mark_segment_size = std::max(min_marks_per_task * threads, sum_marks / number_of_replicas / number_of_replicas);
|
||||
|
||||
/// Squeeze the value to the borders.
|
||||
mark_segment_size = std::clamp(mark_segment_size, borders.front(), borders.back());
|
||||
/// After we calculated a hopefully good value for segment_size let's just find the maximal border that is not bigger than the chosen value.
|
||||
for (auto border : borders | std::views::reverse)
|
||||
{
|
||||
if (mark_segment_size >= border)
|
||||
{
|
||||
LOG_DEBUG(log, "Chosen segment size: {}", border);
|
||||
return border;
|
||||
}
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
size_t getMinMarksPerTask(size_t min_marks_per_task, const std::vector<DB::MergeTreeReadTaskInfoPtr> & per_part_infos)
|
||||
{
|
||||
for (const auto & info : per_part_infos)
|
||||
min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task);
|
||||
|
||||
if (min_marks_per_task == 0)
|
||||
throw DB::Exception(
|
||||
DB::ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
|
||||
|
||||
return min_marks_per_task;
|
||||
}
|
||||
}
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ParallelReplicasReadMarks;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -36,17 +122,17 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
|
||||
context_)
|
||||
, extension(std::move(extension_))
|
||||
, coordination_mode(CoordinationMode::Default)
|
||||
, min_marks_per_task(pool_settings.min_marks_for_concurrent_read)
|
||||
, min_marks_per_task(getMinMarksPerTask(pool_settings.min_marks_for_concurrent_read, per_part_infos))
|
||||
, mark_segment_size(chooseSegmentSize(
|
||||
log,
|
||||
context_->getSettingsRef().parallel_replicas_mark_segment_size,
|
||||
min_marks_per_task,
|
||||
pool_settings.threads,
|
||||
pool_settings.sum_marks,
|
||||
extension.total_nodes_count))
|
||||
{
|
||||
for (const auto & info : per_part_infos)
|
||||
min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task);
|
||||
|
||||
if (min_marks_per_task == 0)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)");
|
||||
|
||||
extension.all_callback(
|
||||
InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica));
|
||||
extension.all_callback(InitialAllRangesAnnouncement(
|
||||
coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica, mark_segment_size));
|
||||
}
|
||||
|
||||
MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_idx*/, MergeTreeReadTask * previous_task)
|
||||
@ -111,6 +197,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicas::getTask(size_t /*task_id
|
||||
if (current_task.ranges.empty())
|
||||
buffered_ranges.pop_front();
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ParallelReplicasReadMarks, current_sum_marks);
|
||||
return createTask(per_part_infos[part_idx], std::move(ranges_to_read), previous_task);
|
||||
}
|
||||
|
||||
|
@ -31,12 +31,13 @@ public:
|
||||
private:
|
||||
mutable std::mutex mutex;
|
||||
|
||||
LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas");
|
||||
const ParallelReadingExtension extension;
|
||||
const CoordinationMode coordination_mode;
|
||||
size_t min_marks_per_task{0};
|
||||
size_t mark_segment_size{0};
|
||||
RangesInDataPartsDescription buffered_ranges;
|
||||
bool no_more_tasks_available{false};
|
||||
LoggerPtr log = getLogger("MergeTreeReadPoolParallelReplicas");
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,10 @@
|
||||
#include <Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ParallelReplicasReadMarks;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -50,11 +55,8 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd
|
||||
for (const auto & part : parts_ranges)
|
||||
buffered_tasks.push_back({part.data_part->info, MarkRanges{}});
|
||||
|
||||
extension.all_callback(InitialAllRangesAnnouncement(
|
||||
mode,
|
||||
parts_ranges.getDescriptions(),
|
||||
extension.number_of_current_replica
|
||||
));
|
||||
extension.all_callback(
|
||||
InitialAllRangesAnnouncement(mode, parts_ranges.getDescriptions(), extension.number_of_current_replica, /*mark_segment_size_=*/0));
|
||||
}
|
||||
|
||||
MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t task_idx, MergeTreeReadTask * previous_task)
|
||||
@ -75,13 +77,14 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t ta
|
||||
{
|
||||
auto result = std::move(desc.ranges);
|
||||
desc.ranges = MarkRanges{};
|
||||
ProfileEvents::increment(ProfileEvents::ParallelReplicasReadMarks, desc.ranges.getNumberOfMarks());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
if (auto result = get_from_buffer(); result)
|
||||
if (auto result = get_from_buffer())
|
||||
return createTask(per_part_infos[task_idx], std::move(*result), previous_task);
|
||||
|
||||
if (no_more_tasks)
|
||||
@ -104,7 +107,7 @@ MergeTreeReadTaskPtr MergeTreeReadPoolParallelReplicasInOrder::getTask(size_t ta
|
||||
std::move(new_ranges.begin(), new_ranges.end(), std::back_inserter(old_ranges));
|
||||
}
|
||||
|
||||
if (auto result = get_from_buffer(); result)
|
||||
if (auto result = get_from_buffer())
|
||||
return createTask(per_part_infos[task_idx], std::move(*result), previous_task);
|
||||
|
||||
return nullptr;
|
||||
|
@ -27,6 +27,7 @@ struct ParallelReadingExtension
|
||||
MergeTreeAllRangesCallback all_callback;
|
||||
MergeTreeReadTaskCallback callback;
|
||||
size_t number_of_current_replica{0};
|
||||
size_t total_nodes_count{0};
|
||||
};
|
||||
|
||||
/// Base class for MergeTreeThreadSelectAlgorithm and MergeTreeSelectAlgorithm
|
||||
|
@ -361,11 +361,23 @@ std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::
|
||||
UInt64 total_size_of_moved_conditions = 0;
|
||||
UInt64 total_number_of_moved_columns = 0;
|
||||
|
||||
/// Remember positions of conditions in where_conditions list
|
||||
/// to keep original order of conditions in prewhere_conditions while moving.
|
||||
std::unordered_map<const Condition *, size_t> condition_positions;
|
||||
size_t position= 0;
|
||||
for (const auto & condition : where_conditions)
|
||||
condition_positions[&condition] = position++;
|
||||
|
||||
/// Move condition and all other conditions depend on the same set of columns.
|
||||
auto move_condition = [&](Conditions::iterator cond_it)
|
||||
{
|
||||
LOG_TRACE(log, "Condition {} moved to PREWHERE", cond_it->node.getColumnName());
|
||||
prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, cond_it);
|
||||
/// Keep the original order of conditions in prewhere_conditions.
|
||||
position = condition_positions[&(*cond_it)];
|
||||
auto prewhere_it = prewhere_conditions.begin();
|
||||
while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
|
||||
++prewhere_it;
|
||||
prewhere_conditions.splice(prewhere_it, where_conditions, cond_it);
|
||||
total_size_of_moved_conditions += cond_it->columns_size;
|
||||
total_number_of_moved_columns += cond_it->table_columns.size();
|
||||
|
||||
@ -375,7 +387,12 @@ std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::
|
||||
if (jt->viable && jt->columns_size == cond_it->columns_size && jt->table_columns == cond_it->table_columns)
|
||||
{
|
||||
LOG_TRACE(log, "Condition {} moved to PREWHERE", jt->node.getColumnName());
|
||||
prewhere_conditions.splice(prewhere_conditions.end(), where_conditions, jt++);
|
||||
/// Keep the original order of conditions in prewhere_conditions.
|
||||
position = condition_positions[&(*jt)];
|
||||
prewhere_it = prewhere_conditions.begin();
|
||||
while (condition_positions[&(*prewhere_it)] < position && prewhere_it != prewhere_conditions.end())
|
||||
++prewhere_it;
|
||||
prewhere_conditions.splice(prewhere_it, where_conditions, jt++);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -212,14 +212,11 @@ using PartRefs = std::deque<Parts::iterator>;
|
||||
class DefaultCoordinator : public ParallelReplicasReadingCoordinator::ImplInterface
|
||||
{
|
||||
public:
|
||||
explicit DefaultCoordinator(size_t replicas_count_, size_t mark_segment_size_)
|
||||
explicit DefaultCoordinator(size_t replicas_count_)
|
||||
: ParallelReplicasReadingCoordinator::ImplInterface(replicas_count_)
|
||||
, mark_segment_size(mark_segment_size_)
|
||||
, replica_status(replicas_count_)
|
||||
, distribution_by_hash_queue(replicas_count_)
|
||||
{
|
||||
if (mark_segment_size == 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Zero value provided for `mark_segment_size`");
|
||||
}
|
||||
|
||||
~DefaultCoordinator() override;
|
||||
@ -232,7 +229,7 @@ public:
|
||||
|
||||
private:
|
||||
/// This many granules will represent a single segment of marks that will be assigned to a replica
|
||||
const size_t mark_segment_size{0};
|
||||
size_t mark_segment_size{0};
|
||||
|
||||
bool state_initialized{false};
|
||||
size_t finished_replicas{0};
|
||||
@ -376,17 +373,20 @@ void DefaultCoordinator::initializeReadingState(InitialAllRangesAnnouncement ann
|
||||
if (state_initialized)
|
||||
return;
|
||||
|
||||
for (auto && part : announcement.description)
|
||||
{
|
||||
auto intersecting_it = std::find_if(
|
||||
all_parts_to_read.begin(),
|
||||
all_parts_to_read.end(),
|
||||
[&part](const Part & other) { return !other.description.info.isDisjoint(part.info); });
|
||||
/// To speedup search for adjacent parts
|
||||
Parts known_parts(all_parts_to_read.begin(), all_parts_to_read.end());
|
||||
|
||||
if (intersecting_it != all_parts_to_read.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersecting parts found in announcement");
|
||||
for (auto && part : announcement.description)
|
||||
{
|
||||
auto intersecting_it = known_parts.lower_bound(Part{.description = part, .replicas = {}});
|
||||
|
||||
all_parts_to_read.push_back(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
|
||||
if (intersecting_it != known_parts.end() && !intersecting_it->description.info.isDisjoint(part.info))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersecting parts found in announcement");
|
||||
|
||||
known_parts.emplace(Part{.description = part, .replicas = {}});
|
||||
all_parts_to_read.push_back(Part{.description = std::move(part), .replicas = {announcement.replica_num}});
|
||||
}
|
||||
}
|
||||
|
||||
std::ranges::sort(
|
||||
@ -394,7 +394,11 @@ void DefaultCoordinator::initializeReadingState(InitialAllRangesAnnouncement ann
|
||||
state_initialized = true;
|
||||
source_replica_for_parts_snapshot = announcement.replica_num;
|
||||
|
||||
LOG_DEBUG(log, "Reading state is fully initialized: {}", fmt::join(all_parts_to_read, "; "));
|
||||
mark_segment_size = announcement.mark_segment_size;
|
||||
if (mark_segment_size == 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Zero value provided for `mark_segment_size`");
|
||||
|
||||
LOG_DEBUG(log, "Reading state is fully initialized: {}, mark_segment_size: {}", fmt::join(all_parts_to_read, "; "), mark_segment_size);
|
||||
}
|
||||
|
||||
void DefaultCoordinator::markReplicaAsUnavailable(size_t replica_number)
|
||||
@ -869,8 +873,7 @@ void InOrderCoordinator<mode>::doHandleInitialAllRangesAnnouncement(InitialAllRa
|
||||
/// To get rid of duplicates
|
||||
for (auto && part: announcement.description)
|
||||
{
|
||||
auto the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
|
||||
[&part] (const Part & other) { return other.description.info == part.info; });
|
||||
auto the_same_it = all_parts_to_read.find(Part{.description = part, .replicas = {}});
|
||||
|
||||
/// We have the same part - add the info about presence on the corresponding replica to it
|
||||
if (the_same_it != all_parts_to_read.end())
|
||||
@ -882,12 +885,28 @@ void InOrderCoordinator<mode>::doHandleInitialAllRangesAnnouncement(InitialAllRa
|
||||
if (state_initialized)
|
||||
continue;
|
||||
|
||||
auto covering_or_the_same_it = std::find_if(all_parts_to_read.begin(), all_parts_to_read.end(),
|
||||
[&part] (const Part & other) { return other.description.info.contains(part.info) || part.info.contains(other.description.info); });
|
||||
/// Look for the first part >= current
|
||||
auto covering_it = all_parts_to_read.lower_bound(Part{.description = part, .replicas = {}});
|
||||
|
||||
/// It is covering part or we have covering - skip it
|
||||
if (covering_or_the_same_it != all_parts_to_read.end())
|
||||
continue;
|
||||
if (covering_it != all_parts_to_read.end())
|
||||
{
|
||||
/// Checks if other part covers this one or this one covers the other
|
||||
auto is_covered_or_covering = [&part] (const Part & other)
|
||||
{
|
||||
return other.description.info.contains(part.info) || part.info.contains(other.description.info);
|
||||
};
|
||||
|
||||
if (is_covered_or_covering(*covering_it))
|
||||
continue;
|
||||
|
||||
/// Also look at the previous part, it could be covering the current one
|
||||
if (covering_it != all_parts_to_read.begin())
|
||||
{
|
||||
--covering_it;
|
||||
if (is_covered_or_covering(*covering_it))
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
new_rows_to_read += part.rows;
|
||||
|
||||
@ -896,6 +915,21 @@ void InOrderCoordinator<mode>::doHandleInitialAllRangesAnnouncement(InitialAllRa
|
||||
std::sort(ranges.begin(), ranges.end());
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// Double check that there are no intersecting parts
|
||||
{
|
||||
auto intersecting_part_it = std::adjacent_find(all_parts_to_read.begin(), all_parts_to_read.end(),
|
||||
[] (const Part & lhs, const Part & rhs)
|
||||
{
|
||||
return !lhs.description.info.isDisjoint(rhs.description.info);
|
||||
});
|
||||
|
||||
if (intersecting_part_it != all_parts_to_read.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Parts {} and {} intersect",
|
||||
intersecting_part_it->description.info.getPartNameV1(), std::next(intersecting_part_it)->description.info.getPartNameV1());
|
||||
}
|
||||
#endif
|
||||
|
||||
state_initialized = true;
|
||||
|
||||
// progress_callback is not set when local plan is used for initiator
|
||||
@ -1062,7 +1096,7 @@ void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode)
|
||||
switch (mode)
|
||||
{
|
||||
case CoordinationMode::Default:
|
||||
pimpl = std::make_unique<DefaultCoordinator>(replicas_count, mark_segment_size);
|
||||
pimpl = std::make_unique<DefaultCoordinator>(replicas_count);
|
||||
break;
|
||||
case CoordinationMode::WithOrder:
|
||||
pimpl = std::make_unique<InOrderCoordinator<CoordinationMode::WithOrder>>(replicas_count);
|
||||
@ -1080,8 +1114,7 @@ void ParallelReplicasReadingCoordinator::initialize(CoordinationMode mode)
|
||||
pimpl->markReplicaAsUnavailable(replica);
|
||||
}
|
||||
|
||||
ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_)
|
||||
: replicas_count(replicas_count_), mark_segment_size(mark_segment_size_)
|
||||
ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator(size_t replicas_count_) : replicas_count(replicas_count_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@ class ParallelReplicasReadingCoordinator
|
||||
public:
|
||||
class ImplInterface;
|
||||
|
||||
explicit ParallelReplicasReadingCoordinator(size_t replicas_count_, size_t mark_segment_size_ = 0);
|
||||
explicit ParallelReplicasReadingCoordinator(size_t replicas_count_);
|
||||
~ParallelReplicasReadingCoordinator();
|
||||
|
||||
void handleInitialAllRangesAnnouncement(InitialAllRangesAnnouncement);
|
||||
@ -35,7 +35,6 @@ private:
|
||||
|
||||
std::mutex mutex;
|
||||
const size_t replicas_count{0};
|
||||
size_t mark_segment_size{0};
|
||||
std::unique_ptr<ImplInterface> pimpl;
|
||||
ProgressCallback progress_callback; // store the callback only to bypass it to coordinator implementation
|
||||
std::set<size_t> replicas_used;
|
||||
|
@ -391,7 +391,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
message = PreformattedMessage::create(
|
||||
"Part {} has a broken projections. It will be ignored. Broken projections info: {}",
|
||||
"Part `{}` has broken projections. It will be ignored. Broken projections info: {}",
|
||||
part_name, getCurrentExceptionMessage(true));
|
||||
LOG_DEBUG(log, message);
|
||||
result.action = ReplicatedCheckResult::DoNothing;
|
||||
|
@ -2,10 +2,10 @@
|
||||
#include <Storages/MergeTree/RequestResponse.h>
|
||||
|
||||
#include <Core/ProtocolDefines.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Common/SipHash.h>
|
||||
|
||||
#include <consistent_hashing.h>
|
||||
|
||||
@ -14,25 +14,29 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_PROTOCOL;
|
||||
extern const int UNKNOWN_ELEMENT_OF_ENUM;
|
||||
extern const int UNKNOWN_PROTOCOL;
|
||||
extern const int UNKNOWN_ELEMENT_OF_ENUM;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
CoordinationMode validateAndGet(uint8_t candidate)
|
||||
{
|
||||
if (candidate <= static_cast<uint8_t>(CoordinationMode::MAX))
|
||||
return static_cast<CoordinationMode>(candidate);
|
||||
CoordinationMode validateAndGet(uint8_t candidate)
|
||||
{
|
||||
if (candidate <= static_cast<uint8_t>(CoordinationMode::MAX))
|
||||
return static_cast<CoordinationMode>(candidate);
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM, "Unknown reading mode: {}", candidate);
|
||||
}
|
||||
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_OF_ENUM, "Unknown reading mode: {}", candidate);
|
||||
}
|
||||
}
|
||||
|
||||
void ParallelReadRequest::serialize(WriteBuffer & out) const
|
||||
void ParallelReadRequest::serialize(WriteBuffer & out, UInt64 initiator_protocol_version) const
|
||||
{
|
||||
UInt64 version = DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION;
|
||||
/// Must be the first
|
||||
/// Previously we didn't maintain backward compatibility and every change was breaking.
|
||||
/// Particularly, we had an equality check for the version. To work around that code
|
||||
/// in previous server versions we now have to lie to them about the version.
|
||||
const UInt64 version = initiator_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL
|
||||
? DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION
|
||||
: DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION;
|
||||
writeIntBinary(version, out);
|
||||
|
||||
writeIntBinary(mode, out);
|
||||
@ -53,10 +57,12 @@ ParallelReadRequest ParallelReadRequest::deserialize(ReadBuffer & in)
|
||||
{
|
||||
UInt64 version;
|
||||
readIntBinary(version, in);
|
||||
if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION)
|
||||
throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading "\
|
||||
"from replicas differ. Got: {}, supported version: {}",
|
||||
version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION);
|
||||
if (version < DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION)
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_PROTOCOL,
|
||||
"Parallel replicas protocol version is too old. Got: {}, min supported version: {}",
|
||||
version,
|
||||
DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION);
|
||||
|
||||
CoordinationMode mode;
|
||||
size_t replica_num;
|
||||
@ -70,12 +76,7 @@ ParallelReadRequest ParallelReadRequest::deserialize(ReadBuffer & in)
|
||||
readIntBinary(min_number_of_marks, in);
|
||||
description.deserialize(in);
|
||||
|
||||
return ParallelReadRequest(
|
||||
mode,
|
||||
replica_num,
|
||||
min_number_of_marks,
|
||||
std::move(description)
|
||||
);
|
||||
return ParallelReadRequest(mode, replica_num, min_number_of_marks, std::move(description));
|
||||
}
|
||||
|
||||
void ParallelReadRequest::merge(ParallelReadRequest & other)
|
||||
@ -86,9 +87,14 @@ void ParallelReadRequest::merge(ParallelReadRequest & other)
|
||||
description.merge(other.description);
|
||||
}
|
||||
|
||||
void ParallelReadResponse::serialize(WriteBuffer & out) const
|
||||
void ParallelReadResponse::serialize(WriteBuffer & out, UInt64 replica_protocol_version) const
|
||||
{
|
||||
UInt64 version = DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION;
|
||||
/// Previously we didn't maintain backward compatibility and every change was breaking.
|
||||
/// Particularly, we had an equality check for the version. To work around that code
|
||||
/// in previous server versions we now have to lie to them about the version.
|
||||
UInt64 version = replica_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL
|
||||
? DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION
|
||||
: DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION;
|
||||
/// Must be the first
|
||||
writeIntBinary(version, out);
|
||||
|
||||
@ -105,25 +111,33 @@ void ParallelReadResponse::deserialize(ReadBuffer & in)
|
||||
{
|
||||
UInt64 version;
|
||||
readIntBinary(version, in);
|
||||
if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION)
|
||||
throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading " \
|
||||
"from replicas differ. Got: {}, supported version: {}",
|
||||
version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION);
|
||||
if (version < DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION)
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_PROTOCOL,
|
||||
"Parallel replicas protocol version is too old. Got: {}, min supported version: {}",
|
||||
version,
|
||||
DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION);
|
||||
|
||||
readBoolText(finish, in);
|
||||
description.deserialize(in);
|
||||
}
|
||||
|
||||
|
||||
void InitialAllRangesAnnouncement::serialize(WriteBuffer & out) const
|
||||
void InitialAllRangesAnnouncement::serialize(WriteBuffer & out, UInt64 initiator_protocol_version) const
|
||||
{
|
||||
UInt64 version = DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION;
|
||||
/// Must be the first
|
||||
/// Previously we didn't maintain backward compatibility and every change was breaking.
|
||||
/// Particularly, we had an equality check for the version. To work around that code
|
||||
/// in previous server versions we now have to lie to them about the version.
|
||||
UInt64 version = initiator_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL
|
||||
? DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION
|
||||
: DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION;
|
||||
writeIntBinary(version, out);
|
||||
|
||||
writeIntBinary(mode, out);
|
||||
description.serialize(out);
|
||||
writeIntBinary(replica_num, out);
|
||||
if (initiator_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
|
||||
writeIntBinary(mark_segment_size, out);
|
||||
}
|
||||
|
||||
|
||||
@ -132,14 +146,16 @@ String InitialAllRangesAnnouncement::describe()
|
||||
return fmt::format("replica {}, mode {}, {}", replica_num, mode, description.describe());
|
||||
}
|
||||
|
||||
InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffer & in)
|
||||
InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffer & in, UInt64 replica_protocol_version)
|
||||
{
|
||||
UInt64 version;
|
||||
readIntBinary(version, in);
|
||||
if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION)
|
||||
throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading " \
|
||||
"from replicas differ. Got: {}, supported version: {}",
|
||||
version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION);
|
||||
if (version < DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION)
|
||||
throw Exception(
|
||||
ErrorCodes::UNKNOWN_PROTOCOL,
|
||||
"Parallel replicas protocol version is too old. Got: {}, min supported version: {}",
|
||||
version,
|
||||
DBMS_MIN_SUPPORTED_PARALLEL_REPLICAS_PROTOCOL_VERSION);
|
||||
|
||||
CoordinationMode mode;
|
||||
RangesInDataPartsDescription description;
|
||||
@ -151,11 +167,11 @@ InitialAllRangesAnnouncement InitialAllRangesAnnouncement::deserialize(ReadBuffe
|
||||
description.deserialize(in);
|
||||
readIntBinary(replica_num, in);
|
||||
|
||||
return InitialAllRangesAnnouncement {
|
||||
mode,
|
||||
description,
|
||||
replica_num
|
||||
};
|
||||
size_t mark_segment_size = 128;
|
||||
if (replica_protocol_version >= DBMS_MIN_REVISION_WITH_VERSIONED_PARALLEL_REPLICAS_PROTOCOL)
|
||||
readIntBinary(mark_segment_size, in);
|
||||
|
||||
return InitialAllRangesAnnouncement{mode, description, replica_num, mark_segment_size};
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ struct ParallelReadRequest
|
||||
/// Contains only data part names without mark ranges.
|
||||
RangesInDataPartsDescription description;
|
||||
|
||||
void serialize(WriteBuffer & out) const;
|
||||
void serialize(WriteBuffer & out, UInt64 initiator_protocol_version) const;
|
||||
String describe() const;
|
||||
static ParallelReadRequest deserialize(ReadBuffer & in);
|
||||
void merge(ParallelReadRequest & other);
|
||||
@ -78,7 +78,7 @@ struct ParallelReadResponse
|
||||
bool finish{false};
|
||||
RangesInDataPartsDescription description;
|
||||
|
||||
void serialize(WriteBuffer & out) const;
|
||||
void serialize(WriteBuffer & out, UInt64 replica_protocol_version) const;
|
||||
String describe() const;
|
||||
void deserialize(ReadBuffer & in);
|
||||
};
|
||||
@ -93,21 +93,18 @@ struct InitialAllRangesAnnouncement
|
||||
/// No default constructor, you must initialize all fields at once.
|
||||
|
||||
InitialAllRangesAnnouncement(
|
||||
CoordinationMode mode_,
|
||||
RangesInDataPartsDescription description_,
|
||||
size_t replica_num_)
|
||||
: mode(mode_)
|
||||
, description(description_)
|
||||
, replica_num(replica_num_)
|
||||
CoordinationMode mode_, RangesInDataPartsDescription description_, size_t replica_num_, size_t mark_segment_size_)
|
||||
: mode(mode_), description(std::move(description_)), replica_num(replica_num_), mark_segment_size(mark_segment_size_)
|
||||
{}
|
||||
|
||||
CoordinationMode mode;
|
||||
RangesInDataPartsDescription description;
|
||||
size_t replica_num;
|
||||
size_t mark_segment_size;
|
||||
|
||||
void serialize(WriteBuffer & out) const;
|
||||
void serialize(WriteBuffer & out, UInt64 initiator_protocol_version) const;
|
||||
String describe();
|
||||
static InitialAllRangesAnnouncement deserialize(ReadBuffer & in);
|
||||
static InitialAllRangesAnnouncement deserialize(ReadBuffer & i, UInt64 replica_protocol_version);
|
||||
};
|
||||
|
||||
|
||||
|
@ -323,7 +323,7 @@ static IMergeTreeDataPart::Checksums checkDataPart(
|
||||
broken_projections_message += "\n";
|
||||
|
||||
broken_projections_message += fmt::format(
|
||||
"Part {} has a broken projection {} (error: {})",
|
||||
"Part `{}` has broken projection `{}` (error: {})",
|
||||
data_part->name, name, exception_message);
|
||||
}
|
||||
|
||||
|
@ -110,16 +110,19 @@ private:
|
||||
ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(
|
||||
const fs::path & zookeeper_path_,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata_,
|
||||
const ObjectStorageQueueSettings & settings_)
|
||||
: settings(settings_)
|
||||
, table_metadata(table_metadata_)
|
||||
size_t cleanup_interval_min_ms_,
|
||||
size_t cleanup_interval_max_ms_)
|
||||
: table_metadata(table_metadata_)
|
||||
, mode(table_metadata.getMode())
|
||||
, zookeeper_path(zookeeper_path_)
|
||||
, buckets_num(getBucketsNum(settings_))
|
||||
, buckets_num(getBucketsNum(table_metadata_))
|
||||
, cleanup_interval_min_ms(cleanup_interval_min_ms_)
|
||||
, cleanup_interval_max_ms(cleanup_interval_max_ms_)
|
||||
, log(getLogger("StorageObjectStorageQueue(" + zookeeper_path_.string() + ")"))
|
||||
, local_file_statuses(std::make_shared<LocalFileStatuses>())
|
||||
{
|
||||
if (settings.mode == ObjectStorageQueueMode::UNORDERED
|
||||
&& (settings.tracked_files_limit || settings.tracked_file_ttl_sec))
|
||||
if (mode == ObjectStorageQueueMode::UNORDERED
|
||||
&& (table_metadata.tracked_files_limit || table_metadata.tracked_file_ttl_sec))
|
||||
{
|
||||
task = Context::getGlobalContextInstance()->getSchedulePool().createTask(
|
||||
"ObjectStorageQueueCleanupFunc",
|
||||
@ -128,10 +131,10 @@ ObjectStorageQueueMetadata::ObjectStorageQueueMetadata(
|
||||
task->activate();
|
||||
task->scheduleAfter(
|
||||
generateRescheduleInterval(
|
||||
settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms));
|
||||
cleanup_interval_min_ms, cleanup_interval_max_ms));
|
||||
}
|
||||
LOG_TRACE(log, "Mode: {}, buckets: {}, processing threads: {}, result buckets num: {}",
|
||||
settings.mode.toString(), settings.buckets, settings.processing_threads_num, buckets_num);
|
||||
table_metadata.mode, table_metadata.buckets, table_metadata.processing_threads_num, buckets_num);
|
||||
|
||||
}
|
||||
|
||||
@ -162,7 +165,7 @@ ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileM
|
||||
ObjectStorageQueueOrderedFileMetadata::BucketInfoPtr bucket_info)
|
||||
{
|
||||
auto file_status = local_file_statuses->get(path, /* create */true);
|
||||
switch (settings.mode.value)
|
||||
switch (mode)
|
||||
{
|
||||
case ObjectStorageQueueMode::ORDERED:
|
||||
return std::make_shared<ObjectStorageQueueOrderedFileMetadata>(
|
||||
@ -171,39 +174,28 @@ ObjectStorageQueueMetadata::FileMetadataPtr ObjectStorageQueueMetadata::getFileM
|
||||
file_status,
|
||||
bucket_info,
|
||||
buckets_num,
|
||||
settings.loading_retries,
|
||||
table_metadata.loading_retries,
|
||||
log);
|
||||
case ObjectStorageQueueMode::UNORDERED:
|
||||
return std::make_shared<ObjectStorageQueueUnorderedFileMetadata>(
|
||||
zookeeper_path,
|
||||
path,
|
||||
file_status,
|
||||
settings.loading_retries,
|
||||
table_metadata.loading_retries,
|
||||
log);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueSettings & settings)
|
||||
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & metadata)
|
||||
{
|
||||
if (settings.buckets)
|
||||
return settings.buckets;
|
||||
if (settings.processing_threads_num)
|
||||
return settings.processing_threads_num;
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ObjectStorageQueueMetadata::getBucketsNum(const ObjectStorageQueueTableMetadata & settings)
|
||||
{
|
||||
if (settings.buckets)
|
||||
return settings.buckets;
|
||||
if (settings.processing_threads_num)
|
||||
return settings.processing_threads_num;
|
||||
return 0;
|
||||
if (metadata.buckets)
|
||||
return metadata.buckets;
|
||||
return metadata.processing_threads_num;
|
||||
}
|
||||
|
||||
bool ObjectStorageQueueMetadata::useBucketsForProcessing() const
|
||||
{
|
||||
return settings.mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1);
|
||||
return mode == ObjectStorageQueueMode::ORDERED && (buckets_num > 1);
|
||||
}
|
||||
|
||||
ObjectStorageQueueMetadata::Bucket ObjectStorageQueueMetadata::getBucketForPath(const std::string & path) const
|
||||
@ -217,18 +209,35 @@ ObjectStorageQueueMetadata::tryAcquireBucket(const Bucket & bucket, const Proces
|
||||
return ObjectStorageQueueOrderedFileMetadata::tryAcquireBucket(zookeeper_path, bucket, processor, log);
|
||||
}
|
||||
|
||||
void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
ObjectStorageQueueTableMetadata ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
const fs::path & zookeeper_path,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata,
|
||||
const ObjectStorageQueueSettings & settings,
|
||||
const ColumnsDescription & columns,
|
||||
const std::string & format,
|
||||
LoggerPtr log)
|
||||
{
|
||||
const auto table_metadata_path = zookeeper_path / "metadata";
|
||||
const auto buckets_num = getBucketsNum(settings);
|
||||
const auto metadata_paths = settings.mode == ObjectStorageQueueMode::ORDERED
|
||||
? ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num)
|
||||
: ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths();
|
||||
ObjectStorageQueueTableMetadata table_metadata(settings, columns, format);
|
||||
|
||||
std::vector<std::string> metadata_paths;
|
||||
size_t buckets_num = 0;
|
||||
if (settings.mode == ObjectStorageQueueMode::ORDERED)
|
||||
{
|
||||
buckets_num = getBucketsNum(table_metadata);
|
||||
if (buckets_num == 0)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot have zero values of `processing_threads_num` and `buckets`");
|
||||
|
||||
LOG_TRACE(log, "Local buckets num: {}", buckets_num);
|
||||
|
||||
metadata_paths = ObjectStorageQueueOrderedFileMetadata::getMetadataPaths(buckets_num);
|
||||
}
|
||||
else
|
||||
{
|
||||
metadata_paths = ObjectStorageQueueUnorderedFileMetadata::getMetadataPaths();
|
||||
}
|
||||
|
||||
const auto table_metadata_path = zookeeper_path / "metadata";
|
||||
auto zookeeper = getZooKeeper();
|
||||
zookeeper->createAncestors(zookeeper_path);
|
||||
|
||||
@ -242,7 +251,7 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
LOG_TRACE(log, "Metadata in keeper: {}", metadata_str);
|
||||
|
||||
table_metadata.checkEquals(metadata_from_zk);
|
||||
return;
|
||||
return table_metadata;
|
||||
}
|
||||
|
||||
Coordination::Requests requests;
|
||||
@ -256,15 +265,15 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
requests.emplace_back(zkutil::makeCreateRequest(zk_path, "", zkutil::CreateMode::Persistent));
|
||||
}
|
||||
|
||||
if (!settings.last_processed_path.value.empty())
|
||||
if (!table_metadata.last_processed_path.empty())
|
||||
{
|
||||
ObjectStorageQueueOrderedFileMetadata(
|
||||
zookeeper_path,
|
||||
settings.last_processed_path,
|
||||
table_metadata.last_processed_path,
|
||||
std::make_shared<FileStatus>(),
|
||||
/* bucket_info */nullptr,
|
||||
buckets_num,
|
||||
settings.loading_retries,
|
||||
table_metadata.loading_retries,
|
||||
log).setProcessedAtStartRequests(requests, zookeeper);
|
||||
}
|
||||
|
||||
@ -283,7 +292,7 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
else if (code != Coordination::Error::ZOK)
|
||||
zkutil::KeeperMultiException::check(code, requests, responses);
|
||||
|
||||
return;
|
||||
return table_metadata;
|
||||
}
|
||||
|
||||
throw Exception(
|
||||
@ -295,7 +304,7 @@ void ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
void ObjectStorageQueueMetadata::cleanupThreadFunc()
|
||||
{
|
||||
/// A background task is responsible for maintaining
|
||||
/// settings.tracked_files_limit and max_set_age settings for `unordered` processing mode.
|
||||
/// table_metadata.tracked_files_limit and max_set_age settings for `unordered` processing mode.
|
||||
|
||||
if (shutdown_called)
|
||||
return;
|
||||
@ -314,7 +323,7 @@ void ObjectStorageQueueMetadata::cleanupThreadFunc()
|
||||
|
||||
task->scheduleAfter(
|
||||
generateRescheduleInterval(
|
||||
settings.cleanup_interval_min_ms, settings.cleanup_interval_max_ms));
|
||||
cleanup_interval_min_ms, cleanup_interval_max_ms));
|
||||
}
|
||||
|
||||
void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
@ -357,11 +366,11 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
return;
|
||||
}
|
||||
|
||||
chassert(settings.tracked_files_limit || settings.tracked_file_ttl_sec);
|
||||
const bool check_nodes_limit = settings.tracked_files_limit > 0;
|
||||
const bool check_nodes_ttl = settings.tracked_file_ttl_sec > 0;
|
||||
chassert(table_metadata.tracked_files_limit || table_metadata.tracked_file_ttl_sec);
|
||||
const bool check_nodes_limit = table_metadata.tracked_files_limit > 0;
|
||||
const bool check_nodes_ttl = table_metadata.tracked_file_ttl_sec > 0;
|
||||
|
||||
const bool nodes_limit_exceeded = nodes_num > settings.tracked_files_limit;
|
||||
const bool nodes_limit_exceeded = nodes_num > table_metadata.tracked_files_limit;
|
||||
if ((!nodes_limit_exceeded || !check_nodes_limit) && !check_nodes_ttl)
|
||||
{
|
||||
LOG_TEST(log, "No limit exceeded");
|
||||
@ -434,9 +443,9 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
wb << fmt::format("Node: {}, path: {}, timestamp: {};\n", node, metadata.file_path, metadata.last_processed_timestamp);
|
||||
return wb.str();
|
||||
};
|
||||
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", settings.tracked_files_limit, settings.tracked_file_ttl_sec, get_nodes_str());
|
||||
LOG_TEST(log, "Checking node limits (max size: {}, max age: {}) for {}", table_metadata.tracked_files_limit, table_metadata.tracked_file_ttl_sec, get_nodes_str());
|
||||
|
||||
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - settings.tracked_files_limit : 0;
|
||||
size_t nodes_to_remove = check_nodes_limit && nodes_limit_exceeded ? nodes_num - table_metadata.tracked_files_limit : 0;
|
||||
for (const auto & node : sorted_nodes)
|
||||
{
|
||||
if (nodes_to_remove)
|
||||
@ -455,7 +464,7 @@ void ObjectStorageQueueMetadata::cleanupThreadFuncImpl()
|
||||
else if (check_nodes_ttl)
|
||||
{
|
||||
UInt64 node_age = getCurrentTime() - node.metadata.last_processed_timestamp;
|
||||
if (node_age >= settings.tracked_file_ttl_sec)
|
||||
if (node_age >= table_metadata.tracked_file_ttl_sec)
|
||||
{
|
||||
LOG_TRACE(log, "Removing node at path {} ({}) because file ttl is reached",
|
||||
node.metadata.file_path, node.zk_path);
|
||||
|
@ -56,14 +56,16 @@ public:
|
||||
ObjectStorageQueueMetadata(
|
||||
const fs::path & zookeeper_path_,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata_,
|
||||
const ObjectStorageQueueSettings & settings_);
|
||||
size_t cleanup_interval_min_ms_,
|
||||
size_t cleanup_interval_max_ms_);
|
||||
|
||||
~ObjectStorageQueueMetadata();
|
||||
|
||||
static void syncWithKeeper(
|
||||
static ObjectStorageQueueTableMetadata syncWithKeeper(
|
||||
const fs::path & zookeeper_path,
|
||||
const ObjectStorageQueueTableMetadata & table_metadata,
|
||||
const ObjectStorageQueueSettings & settings,
|
||||
const ColumnsDescription & columns,
|
||||
const std::string & format,
|
||||
LoggerPtr log);
|
||||
|
||||
void shutdown();
|
||||
@ -78,8 +80,7 @@ public:
|
||||
Bucket getBucketForPath(const std::string & path) const;
|
||||
ObjectStorageQueueOrderedFileMetadata::BucketHolderPtr tryAcquireBucket(const Bucket & bucket, const Processor & processor);
|
||||
|
||||
static size_t getBucketsNum(const ObjectStorageQueueSettings & settings);
|
||||
static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & settings);
|
||||
static size_t getBucketsNum(const ObjectStorageQueueTableMetadata & metadata);
|
||||
|
||||
void checkTableMetadataEquals(const ObjectStorageQueueMetadata & other);
|
||||
|
||||
@ -90,10 +91,11 @@ private:
|
||||
void cleanupThreadFunc();
|
||||
void cleanupThreadFuncImpl();
|
||||
|
||||
ObjectStorageQueueSettings settings;
|
||||
ObjectStorageQueueTableMetadata table_metadata;
|
||||
const ObjectStorageQueueMode mode;
|
||||
const fs::path zookeeper_path;
|
||||
const size_t buckets_num;
|
||||
const size_t cleanup_interval_min_ms, cleanup_interval_max_ms;
|
||||
|
||||
LoggerPtr log;
|
||||
|
||||
|
@ -27,6 +27,12 @@ namespace
|
||||
return ObjectStorageQueueMode::UNORDERED;
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected ObjectStorageQueue mode: {}", mode);
|
||||
}
|
||||
|
||||
void validateMode(const std::string & mode)
|
||||
{
|
||||
if (mode != "ordered" && mode != "unordered")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected ObjectStorageQueue mode: {}", mode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -42,6 +48,8 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(
|
||||
, tracked_file_ttl_sec(engine_settings.tracked_file_ttl_sec)
|
||||
, buckets(engine_settings.buckets)
|
||||
, processing_threads_num(engine_settings.processing_threads_num)
|
||||
, last_processed_path(engine_settings.last_processed_path)
|
||||
, loading_retries(engine_settings.loading_retries)
|
||||
{
|
||||
}
|
||||
|
||||
@ -57,6 +65,7 @@ String ObjectStorageQueueTableMetadata::toString() const
|
||||
json.set("format_name", format_name);
|
||||
json.set("columns", columns);
|
||||
json.set("last_processed_file", last_processed_path);
|
||||
json.set("loading_retries", loading_retries);
|
||||
|
||||
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
oss.exceptions(std::ios::failbit);
|
||||
@ -64,6 +73,11 @@ String ObjectStorageQueueTableMetadata::toString() const
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
ObjectStorageQueueMode ObjectStorageQueueTableMetadata::getMode() const
|
||||
{
|
||||
return modeFromString(mode);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static auto getOrDefault(
|
||||
const Poco::JSON::Object::Ptr & json,
|
||||
@ -90,7 +104,9 @@ ObjectStorageQueueTableMetadata::ObjectStorageQueueTableMetadata(const Poco::JSO
|
||||
, buckets(getOrDefault(json, "buckets", "", 0))
|
||||
, processing_threads_num(getOrDefault(json, "processing_threads_num", "s3queue_", 1))
|
||||
, last_processed_path(getOrDefault<String>(json, "last_processed_file", "s3queue_", ""))
|
||||
, loading_retries(getOrDefault(json, "loading_retries", "", 10))
|
||||
{
|
||||
validateMode(mode);
|
||||
}
|
||||
|
||||
ObjectStorageQueueTableMetadata ObjectStorageQueueTableMetadata::parse(const String & metadata_str)
|
||||
|
@ -27,6 +27,7 @@ struct ObjectStorageQueueTableMetadata
|
||||
const UInt64 buckets;
|
||||
const UInt64 processing_threads_num;
|
||||
const String last_processed_path;
|
||||
const UInt64 loading_retries;
|
||||
|
||||
ObjectStorageQueueTableMetadata(
|
||||
const ObjectStorageQueueSettings & engine_settings,
|
||||
@ -39,6 +40,8 @@ struct ObjectStorageQueueTableMetadata
|
||||
|
||||
String toString() const;
|
||||
|
||||
ObjectStorageQueueMode getMode() const;
|
||||
|
||||
void checkEquals(const ObjectStorageQueueTableMetadata & from_zk) const;
|
||||
|
||||
private:
|
||||
|
@ -62,7 +62,7 @@ namespace
|
||||
return zkutil::extractZooKeeperPath(result_zk_path, true);
|
||||
}
|
||||
|
||||
void checkAndAdjustSettings(
|
||||
void validateSettings(
|
||||
ObjectStorageQueueSettings & queue_settings,
|
||||
bool is_attach)
|
||||
{
|
||||
@ -144,7 +144,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
|
||||
throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "ObjectStorageQueue url must either end with '/' or contain globs");
|
||||
}
|
||||
|
||||
checkAndAdjustSettings(*queue_settings, mode > LoadingStrictnessLevel::CREATE);
|
||||
validateSettings(*queue_settings, mode > LoadingStrictnessLevel::CREATE);
|
||||
|
||||
object_storage = configuration->createObjectStorage(context_, /* is_readonly */true);
|
||||
FormatFactory::instance().checkFormatName(configuration->format);
|
||||
@ -164,10 +164,12 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
|
||||
|
||||
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
|
||||
|
||||
ObjectStorageQueueTableMetadata table_metadata(*queue_settings, storage_metadata.getColumns(), configuration_->format);
|
||||
ObjectStorageQueueMetadata::syncWithKeeper(zk_path, table_metadata, *queue_settings, log);
|
||||
auto table_metadata = ObjectStorageQueueMetadata::syncWithKeeper(
|
||||
zk_path, *queue_settings, storage_metadata.getColumns(), configuration_->format, log);
|
||||
|
||||
auto queue_metadata = std::make_unique<ObjectStorageQueueMetadata>(
|
||||
zk_path, std::move(table_metadata), queue_settings->cleanup_interval_min_ms, queue_settings->cleanup_interval_max_ms);
|
||||
|
||||
auto queue_metadata = std::make_unique<ObjectStorageQueueMetadata>(zk_path, std::move(table_metadata), *queue_settings);
|
||||
files_metadata = ObjectStorageQueueMetadataFactory::instance().getOrCreate(zk_path, std::move(queue_metadata));
|
||||
|
||||
task = getContext()->getSchedulePool().createTask("ObjectStorageQueueStreamingTask", [this] { threadFunc(); });
|
||||
|
@ -1,4 +1,5 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
@ -6,10 +7,12 @@ import sys
|
||||
import time
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterator, List, Union, Optional, Sequence
|
||||
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Envs:
|
||||
GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")
|
||||
@ -36,6 +39,34 @@ def cd(path: Union[Path, str]) -> Iterator[None]:
|
||||
os.chdir(oldpwd)
|
||||
|
||||
|
||||
def kill_ci_runner(message: str) -> None:
|
||||
"""The function to kill the current process with all parents when it's possible.
|
||||
Works only when run with the set `CI` environment"""
|
||||
if not os.getenv("CI", ""): # cycle import env_helper
|
||||
logger.info("Running outside the CI, won't kill the runner")
|
||||
return
|
||||
print(f"::error::{message}")
|
||||
|
||||
def get_ppid_name(pid: int) -> Tuple[int, str]:
|
||||
# Avoid using psutil, it's not in stdlib
|
||||
stats = Path(f"/proc/{pid}/stat").read_text(encoding="utf-8").split()
|
||||
return int(stats[3]), stats[1]
|
||||
|
||||
pid = os.getpid()
|
||||
pids = {} # type: Dict[str, str]
|
||||
while pid:
|
||||
ppid, name = get_ppid_name(pid)
|
||||
pids[str(pid)] = name
|
||||
pid = ppid
|
||||
logger.error(
|
||||
"Sleeping 5 seconds and killing all possible processes from following:\n %s",
|
||||
"\n ".join(f"{p}: {n}" for p, n in pids.items()),
|
||||
)
|
||||
time.sleep(5)
|
||||
# The current process will be killed too
|
||||
subprocess.run(f"kill -9 {' '.join(pids.keys())}", check=False, shell=True)
|
||||
|
||||
|
||||
class GH:
|
||||
class ActionsNames:
|
||||
RunConfig = "RunConfig"
|
||||
|
@ -19,11 +19,12 @@ from collections import defaultdict
|
||||
from itertools import chain
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ci_utils import kill_ci_runner
|
||||
from env_helper import IS_CI
|
||||
from integration_test_images import IMAGES
|
||||
from tee_popen import TeePopen
|
||||
from report import JOB_TIMEOUT_TEST_NAME
|
||||
from stopwatch import Stopwatch
|
||||
from tee_popen import TeePopen
|
||||
|
||||
MAX_RETRY = 1
|
||||
NUM_WORKERS = 5
|
||||
@ -332,7 +333,9 @@ class ClickhouseIntegrationTestsRunner:
|
||||
except subprocess.CalledProcessError as err:
|
||||
logging.info("docker-compose pull failed: %s", str(err))
|
||||
continue
|
||||
logging.error("Pulling images failed for 5 attempts. Will fail the worker.")
|
||||
message = "Pulling images failed for 5 attempts. Will fail the worker."
|
||||
logging.error(message)
|
||||
kill_ci_runner(message)
|
||||
# We pass specific retcode to to ci/integration_test_check.py to skip status reporting and restart job
|
||||
sys.exit(13)
|
||||
|
||||
|
@ -835,7 +835,9 @@ class SettingsRandomizer:
|
||||
),
|
||||
"remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]),
|
||||
"local_filesystem_read_prefetch": lambda: random.randint(0, 1),
|
||||
"filesystem_cache_segments_batch_size": lambda: random.choice([0, 3, 10, 50]),
|
||||
"filesystem_cache_segments_batch_size": lambda: random.choice(
|
||||
[0, 1, 2, 3, 5, 10, 50, 100]
|
||||
),
|
||||
"read_from_filesystem_cache_if_exists_otherwise_bypass_cache": lambda: random.randint(
|
||||
0, 1
|
||||
),
|
||||
|
@ -4100,7 +4100,7 @@ class ClickHouseInstance:
|
||||
exclusion_substring="",
|
||||
):
|
||||
if from_host:
|
||||
# We check fist file exists but want to look for all rotated logs as well
|
||||
# We check first file exists but want to look for all rotated logs as well
|
||||
result = subprocess_check_call(
|
||||
[
|
||||
"bash",
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user