From 0762e1a8901c9cdc798582cb7f3fa74eb02b1834 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Mon, 29 Mar 2021 02:23:20 +0400 Subject: [PATCH 001/204] Implement config parsing and actual support for user_dn_detection section and user_dn placeholder substitution Refactor some config parsing code Rename some arguments to better reflect their meaning Add documentation for user_dn_detection section and user_dn placeholder in config.xml and in docs --- .../external-authenticators/ldap.md | 42 +++++++- programs/server/config.xml | 47 +++++++- src/Access/ExternalAuthenticators.cpp | 101 +++++++++++++----- src/Access/ExternalAuthenticators.h | 6 +- src/Access/LDAPAccessStorage.cpp | 29 +---- src/Access/LDAPAccessStorage.h | 4 +- src/Access/LDAPClient.cpp | 92 ++++++++++++---- src/Access/LDAPClient.h | 17 ++- 8 files changed, 252 insertions(+), 86 deletions(-) diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md index 1b65ecc968b..805d45e1b38 100644 --- a/docs/en/operations/external-authenticators/ldap.md +++ b/docs/en/operations/external-authenticators/ldap.md @@ -17,6 +17,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`. + localhost 636 @@ -31,6 +32,18 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`. /path/to/tls_ca_cert_dir ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 + + + + localhost + 389 + EXAMPLE\{user_name} + + CN=Users,DC=example,DC=com + (&(objectClass=user)(sAMAccountName={user_name})) + + no + ``` @@ -43,6 +56,15 @@ Note, that you can define multiple LDAP servers inside the `ldap_servers` sectio - `port` — LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise. - `bind_dn` — Template used to construct the DN to bind to. - The resulting DN will be constructed by replacing all `{user_name}` substrings of the template with the actual user name during each authentication attempt. +- `user_dn_detection` - Section with LDAP search parameters for detecting the actual user DN of the bound user. + - This is mainly used in search filters for further role mapping when the server is Active Directory. The resulting user DN will be used when replacing `{user_dn}` substrings wherever they are allowed. By default, user DN is set equal to bind DN, but once search is performed, it will be updated with to the actual detected user DN value. + - `base_dn` - Template used to construct the base DN for the LDAP search. + - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` substrings of the template with the actual user name and bind DN during the LDAP search. + - `scope` - Scope of the LDAP search. + - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). + - `search_filter` - Template used to construct the search filter for the LDAP search. + - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings of the template with the actual user name, bind DN, and base DN during the LDAP search. + - Note, that the special characters must be escaped properly in XML. - `verification_cooldown` — A period of time, in seconds, after a successful bind attempt, during which the user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server. - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request. - `enable_tls` — A flag to trigger the use of the secure connection to the LDAP server. @@ -107,7 +129,7 @@ Goes into `config.xml`. - + my_ldap_server @@ -122,6 +144,18 @@ Goes into `config.xml`. clickhouse_ + + + + my_ad_server + + CN=Users,DC=example,DC=com + CN + subtree + (&(objectClass=group)(member={user_dn})) + clickhouse_ + + ``` @@ -137,13 +171,13 @@ Note that `my_ldap_server` referred in the `ldap` section inside the `user_direc - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` and the name of the logged-in user. For each entry found during that search, the value of the specified attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, and the rest of the value becomes the name of a local role defined in ClickHouse, which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. - `base_dn` — Template used to construct the base DN for the LDAP search. - - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` substrings of the template with the actual user name and bind DN during each LDAP search. + - The resulting DN will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{user_dn}` substrings of the template with the actual user name, bind DN, and user DN during each LDAP search. - `scope` — Scope of the LDAP search. - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). - `search_filter` — Template used to construct the search filter for the LDAP search. - - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}` and `{base_dn}` substrings of the template with the actual user name, bind DN and base DN during each LDAP search. + - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, `{user_dn}`, and `{base_dn}` substrings of the template with the actual user name, bind DN, user DN, and base DN during each LDAP search. - Note, that the special characters must be escaped properly in XML. - - `attribute` — Attribute name whose values will be returned by the LDAP search. + - `attribute` — Attribute name whose values will be returned by the LDAP search. `cn`, by default. - `prefix` — Prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. The prefix will be removed from the original strings and the resulting strings will be treated as local role names. Empty by default. [Original article](https://clickhouse.tech/docs/en/operations/external-authenticators/ldap/) diff --git a/programs/server/config.xml b/programs/server/config.xml index 4220ecbcacd..b6df1c42cc0 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -328,6 +328,20 @@ bind_dn - template used to construct the DN to bind to. The resulting DN will be constructed by replacing all '{user_name}' substrings of the template with the actual user name during each authentication attempt. + user_dn_detection - section with LDAP search parameters for detecting the actual user DN of the bound user. + This is mainly used in search filters for further role mapping when the server is Active Directory. The + resulting user DN will be used when replacing '{user_dn}' substrings wherever they are allowed. By default, + user DN is set equal to bind DN, but once search is performed, it will be updated with to the actual detected + user DN value. + base_dn - template used to construct the base DN for the LDAP search. + The resulting DN will be constructed by replacing all '{user_name}' and '{bind_dn}' substrings + of the template with the actual user name and bind DN during the LDAP search. + scope - scope of the LDAP search. + Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default). + search_filter - template used to construct the search filter for the LDAP search. + The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', and '{base_dn}' + substrings of the template with the actual user name, bind DN, and base DN during the LDAP search. + Note, that the special characters must be escaped properly in XML. verification_cooldown - a period of time, in seconds, after a successful bind attempt, during which a user will be assumed to be successfully authenticated for all consecutive requests without contacting the LDAP server. Specify 0 (the default) to disable caching and force contacting the LDAP server for each authentication request. @@ -359,6 +373,17 @@ /path/to/tls_ca_cert_dir ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 + Example (typical Active Directory with configured user DN detection for further role mapping): + + localhost + 389 + EXAMPLE\{user_name} + + CN=Users,DC=example,DC=com + (&(objectClass=user)(sAMAccountName={user_name})) + + no + --> @@ -410,15 +435,16 @@ There can be multiple 'role_mapping' sections defined inside the same 'ldap' section. All of them will be applied. base_dn - template used to construct the base DN for the LDAP search. - The resulting DN will be constructed by replacing all '{user_name}' and '{bind_dn}' substrings - of the template with the actual user name and bind DN during each LDAP search. + The resulting DN will be constructed by replacing all '{user_name}', '{bind_dn}', and '{user_dn}' + substrings of the template with the actual user name, bind DN, and user DN during each LDAP search. scope - scope of the LDAP search. Accepted values are: 'base', 'one_level', 'children', 'subtree' (the default). search_filter - template used to construct the search filter for the LDAP search. - The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', and '{base_dn}' - substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. + The resulting filter will be constructed by replacing all '{user_name}', '{bind_dn}', '{user_dn}', and + '{base_dn}' substrings of the template with the actual user name, bind DN, user DN, and base DN during + each LDAP search. Note, that the special characters must be escaped properly in XML. - attribute - attribute name whose values will be returned by the LDAP search. + attribute - attribute name whose values will be returned by the LDAP search. 'cn', by default. prefix - prefix, that will be expected to be in front of each string in the original list of strings returned by the LDAP search. Prefix will be removed from the original strings and resulting strings will be treated as local role names. Empty, by default. @@ -437,6 +463,17 @@ clickhouse_ + Example (typical Active Directory with role mapping that relies on the detected user DN): + + my_ad_server + + CN=Users,DC=example,DC=com + CN + subtree + (&(objectClass=group)(member={user_dn})) + clickhouse_ + + --> diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 1cade973724..9eaf2a4b04b 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -20,13 +20,42 @@ namespace ErrorCodes namespace { -auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const String & name) +void parseLDAPSearchParams(LDAPClient::SearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix) +{ + const bool has_base_dn = config.has(prefix + ".base_dn"); + const bool has_search_filter = config.has(prefix + ".search_filter"); + const bool has_attribute = config.has(prefix + ".attribute"); + const bool has_scope = config.has(prefix + ".scope"); + + if (has_base_dn) + params.base_dn = config.getString(prefix + ".base_dn"); + + if (has_search_filter) + params.search_filter = config.getString(prefix + ".search_filter"); + + if (has_attribute) + params.attribute = config.getString(prefix + ".attribute"); + + if (has_scope) + { + auto scope = config.getString(prefix + ".scope"); + boost::algorithm::to_lower(scope); + + if (scope == "base") params.scope = LDAPClient::SearchParams::Scope::BASE; + else if (scope == "one_level") params.scope = LDAPClient::SearchParams::Scope::ONE_LEVEL; + else if (scope == "subtree") params.scope = LDAPClient::SearchParams::Scope::SUBTREE; + else if (scope == "children") params.scope = LDAPClient::SearchParams::Scope::CHILDREN; + else + throw Exception("Invalid value for 'scope' field of LDAP search parameters in '" + prefix + + "' section, must be one of 'base', 'one_level', 'subtree', or 'children'", ErrorCodes::BAD_ARGUMENTS); + } +} + +void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConfiguration & config, const String & name) { if (name.empty()) throw Exception("LDAP server name cannot be empty", ErrorCodes::BAD_ARGUMENTS); - LDAPClient::Params params; - const String ldap_server_config = "ldap_servers." + name; const bool has_host = config.has(ldap_server_config + ".host"); @@ -34,6 +63,7 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str const bool has_bind_dn = config.has(ldap_server_config + ".bind_dn"); const bool has_auth_dn_prefix = config.has(ldap_server_config + ".auth_dn_prefix"); const bool has_auth_dn_suffix = config.has(ldap_server_config + ".auth_dn_suffix"); + const bool has_user_dn_detection = config.has(ldap_server_config + ".user_dn_detection"); const bool has_verification_cooldown = config.has(ldap_server_config + ".verification_cooldown"); const bool has_enable_tls = config.has(ldap_server_config + ".enable_tls"); const bool has_tls_minimum_protocol_version = config.has(ldap_server_config + ".tls_minimum_protocol_version"); @@ -66,6 +96,14 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix; } + if (has_user_dn_detection) + { + if (!params.user_dn_detection) + params.user_dn_detection = { .attribute = "dn" }; + + parseLDAPSearchParams(*params.user_dn_detection, config, ldap_server_config + ".user_dn_detection"); + } + if (has_verification_cooldown) params.verification_cooldown = std::chrono::seconds{config.getUInt64(ldap_server_config + ".verification_cooldown")}; @@ -143,14 +181,10 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str } else params.port = (params.enable_tls == LDAPClient::Params::TLSEnable::YES ? 636 : 389); - - return params; } -auto parseKerberosParams(const Poco::Util::AbstractConfiguration & config) +void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::AbstractConfiguration & config) { - GSSAcceptorContext::Params params; - Poco::Util::AbstractConfiguration::Keys keys; config.keys("kerberos", keys); @@ -180,12 +214,20 @@ auto parseKerberosParams(const Poco::Util::AbstractConfiguration & config) params.realm = config.getString("kerberos.realm", ""); params.principal = config.getString("kerberos.principal", ""); - - return params; } } +void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix) +{ + parseLDAPSearchParams(params, config, prefix); + + const bool has_prefix = config.has(prefix + ".prefix"); + + if (has_prefix) + params.prefix = config.getString(prefix + ".prefix"); +} + void ExternalAuthenticators::reset() { std::scoped_lock lock(mutex); @@ -229,7 +271,8 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur { try { - ldap_client_params_blueprint.insert_or_assign(ldap_server_name, parseLDAPServer(config, ldap_server_name)); + ldap_client_params_blueprint.erase(ldap_server_name); + parseLDAPServer(ldap_client_params_blueprint.emplace(ldap_server_name, LDAPClient::Params{}).first->second, config, ldap_server_name); } catch (...) { @@ -240,7 +283,7 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur try { if (kerberos_keys_count > 0) - kerberos_params = parseKerberosParams(config); + parseKerberosParams(kerberos_params.emplace(), config); } catch (...) { @@ -249,7 +292,7 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const BasicCredentials & credentials, - const LDAPClient::SearchParamsList * search_params, LDAPClient::SearchResultsList * search_results) const + const LDAPClient::RoleSearchParamsList * role_search_params, LDAPClient::SearchResultsList * role_search_results) const { std::optional params; std::size_t params_hash = 0; @@ -267,9 +310,9 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B params->password = credentials.getPassword(); params->combineCoreHash(params_hash); - if (search_params) + if (role_search_params) { - for (const auto & params_instance : *search_params) + for (const auto & params_instance : *role_search_params) { params_instance.combineHash(params_hash); } @@ -301,14 +344,14 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B // Ensure that search_params are compatible. ( - search_params == nullptr ? - entry.last_successful_search_results.empty() : - search_params->size() == entry.last_successful_search_results.size() + role_search_params == nullptr ? + entry.last_successful_role_search_results.empty() : + role_search_params->size() == entry.last_successful_role_search_results.size() ) ) { - if (search_results) - *search_results = entry.last_successful_search_results; + if (role_search_results) + *role_search_results = entry.last_successful_role_search_results; return true; } @@ -326,7 +369,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B } LDAPSimpleAuthClient client(params.value()); - const auto result = client.authenticate(search_params, search_results); + const auto result = client.authenticate(role_search_params, role_search_results); const auto current_check_timestamp = std::chrono::steady_clock::now(); // Update the cache, but only if this is the latest check and the server is still configured in a compatible way. @@ -345,9 +388,9 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B std::size_t new_params_hash = 0; new_params.combineCoreHash(new_params_hash); - if (search_params) + if (role_search_params) { - for (const auto & params_instance : *search_params) + for (const auto & params_instance : *role_search_params) { params_instance.combineHash(new_params_hash); } @@ -363,17 +406,17 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B entry.last_successful_params_hash = params_hash; entry.last_successful_authentication_timestamp = current_check_timestamp; - if (search_results) - entry.last_successful_search_results = *search_results; + if (role_search_results) + entry.last_successful_role_search_results = *role_search_results; else - entry.last_successful_search_results.clear(); + entry.last_successful_role_search_results.clear(); } else if ( entry.last_successful_params_hash != params_hash || ( - search_params == nullptr ? - !entry.last_successful_search_results.empty() : - search_params->size() != entry.last_successful_search_results.size() + role_search_params == nullptr ? + !entry.last_successful_role_search_results.empty() : + role_search_params->size() != entry.last_successful_role_search_results.size() ) ) { diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index c8feea7eada..24f1f7b6528 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -34,7 +34,7 @@ public: // The name and readiness of the credentials must be verified before calling these. bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials, - const LDAPClient::SearchParamsList * search_params = nullptr, LDAPClient::SearchResultsList * search_results = nullptr) const; + const LDAPClient::RoleSearchParamsList * role_search_params = nullptr, LDAPClient::SearchResultsList * role_search_results = nullptr) const; bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; GSSAcceptorContext::Params getKerberosParams() const; @@ -44,7 +44,7 @@ private: { std::size_t last_successful_params_hash = 0; std::chrono::steady_clock::time_point last_successful_authentication_timestamp; - LDAPClient::SearchResultsList last_successful_search_results; + LDAPClient::SearchResultsList last_successful_role_search_results; }; using LDAPCache = std::unordered_map; // user name -> cache entry @@ -58,4 +58,6 @@ private: std::optional kerberos_params; }; +void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix); + } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index b47a9b3e041..c1d54e8c9aa 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -68,34 +68,15 @@ void LDAPAccessStorage::setConfiguration(AccessControlManager * access_control_m common_roles_cfg.insert(role_names.begin(), role_names.end()); } - LDAPClient::SearchParamsList role_search_params_cfg; + LDAPClient::RoleSearchParamsList role_search_params_cfg; if (has_role_mapping) { Poco::Util::AbstractConfiguration::Keys all_keys; config.keys(prefix, all_keys); for (const auto & key : all_keys) { - if (key != "role_mapping" && key.find("role_mapping[") != 0) - continue; - - const String rm_prefix = prefix_str + key; - const String rm_prefix_str = rm_prefix + '.'; - role_search_params_cfg.emplace_back(); - auto & rm_params = role_search_params_cfg.back(); - - rm_params.base_dn = config.getString(rm_prefix_str + "base_dn", ""); - rm_params.search_filter = config.getString(rm_prefix_str + "search_filter", ""); - rm_params.attribute = config.getString(rm_prefix_str + "attribute", "cn"); - rm_params.prefix = config.getString(rm_prefix_str + "prefix", ""); - - auto scope = config.getString(rm_prefix_str + "scope", "subtree"); - boost::algorithm::to_lower(scope); - if (scope == "base") rm_params.scope = LDAPClient::SearchParams::Scope::BASE; - else if (scope == "one_level") rm_params.scope = LDAPClient::SearchParams::Scope::ONE_LEVEL; - else if (scope == "subtree") rm_params.scope = LDAPClient::SearchParams::Scope::SUBTREE; - else if (scope == "children") rm_params.scope = LDAPClient::SearchParams::Scope::CHILDREN; - else - throw Exception("Invalid value of 'scope' field in '" + key + "' section of LDAP user directory, must be one of 'base', 'one_level', 'subtree', or 'children'", ErrorCodes::BAD_ARGUMENTS); + if (key == "role_mapping" || key.find("role_mapping[") == 0) + parseLDAPRoleSearchParams(role_search_params_cfg.emplace_back(), config, prefix_str + key); } } @@ -364,7 +345,7 @@ std::set LDAPAccessStorage::mapExternalRolesNoLock(const LDAPClient::Sea bool LDAPAccessStorage::areLDAPCredentialsValidNoLock(const User & user, const Credentials & credentials, - const ExternalAuthenticators & external_authenticators, LDAPClient::SearchResultsList & search_results) const + const ExternalAuthenticators & external_authenticators, LDAPClient::SearchResultsList & role_search_results) const { if (!credentials.isReady()) return false; @@ -373,7 +354,7 @@ bool LDAPAccessStorage::areLDAPCredentialsValidNoLock(const User & user, const C return false; if (const auto * basic_credentials = dynamic_cast(&credentials)) - return external_authenticators.checkLDAPCredentials(ldap_server_name, *basic_credentials, &role_search_params, &search_results); + return external_authenticators.checkLDAPCredentials(ldap_server_name, *basic_credentials, &role_search_params, &role_search_results); return false; } diff --git a/src/Access/LDAPAccessStorage.h b/src/Access/LDAPAccessStorage.h index ea0ab47c225..33ac9f0a914 100644 --- a/src/Access/LDAPAccessStorage.h +++ b/src/Access/LDAPAccessStorage.h @@ -68,12 +68,12 @@ private: void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const LDAPClient::SearchResultsList & external_roles) const; std::set mapExternalRolesNoLock(const LDAPClient::SearchResultsList & external_roles) const; bool areLDAPCredentialsValidNoLock(const User & user, const Credentials & credentials, - const ExternalAuthenticators & external_authenticators, LDAPClient::SearchResultsList & search_results) const; + const ExternalAuthenticators & external_authenticators, LDAPClient::SearchResultsList & role_search_results) const; mutable std::recursive_mutex mutex; AccessControlManager * access_control_manager = nullptr; String ldap_server_name; - LDAPClient::SearchParamsList role_search_params; + LDAPClient::RoleSearchParamsList role_search_params; std::set common_role_names; // role name that should be granted to all users at all times mutable std::map external_role_hashes; // user name -> LDAPClient::SearchResultsList hash (most recently retrieved and processed) mutable std::map> users_per_roles; // role name -> user names (...it should be granted to; may but don't have to exist for common roles) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 5c4b7dd8d99..78b0b7f545b 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -32,6 +32,11 @@ void LDAPClient::SearchParams::combineHash(std::size_t & seed) const boost::hash_combine(seed, static_cast(scope)); boost::hash_combine(seed, search_filter); boost::hash_combine(seed, attribute); +} + +void LDAPClient::RoleSearchParams::combineHash(std::size_t & seed) const +{ + SearchParams::combineHash(seed); boost::hash_combine(seed, prefix); } @@ -42,6 +47,9 @@ void LDAPClient::Params::combineCoreHash(std::size_t & seed) const boost::hash_combine(seed, bind_dn); boost::hash_combine(seed, user); boost::hash_combine(seed, password); + + if (user_dn_detection) + user_dn_detection->combineHash(seed); } LDAPClient::LDAPClient(const Params & params_) @@ -286,18 +294,33 @@ void LDAPClient::openConnection() if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS) diag(ldap_start_tls_s(handle, nullptr, nullptr)); + final_user_name = escapeForLDAP(params.user); + final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} }); + final_user_dn = final_bind_dn; // The default value... may be updated right after a successful bind. + switch (params.sasl_mechanism) { case LDAPClient::Params::SASLMechanism::SIMPLE: { - const auto escaped_user_name = escapeForLDAP(params.user); - const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", escaped_user_name} }); - ::berval cred; cred.bv_val = const_cast(params.password.c_str()); cred.bv_len = params.password.size(); - diag(ldap_sasl_bind_s(handle, bind_dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr)); + diag(ldap_sasl_bind_s(handle, final_bind_dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr)); + + // Once bound, run the user DN search query and update the default value, if asked. + if (params.user_dn_detection) + { + const auto user_dn_search_results = search(*params.user_dn_detection); + + if (user_dn_search_results.size() == 0) + throw Exception("Failed to detect user DN: empty search results", ErrorCodes::LDAP_ERROR); + + if (user_dn_search_results.size() > 1) + throw Exception("Failed to detect user DN: more than one entry in the search results", ErrorCodes::LDAP_ERROR); + + final_user_dn = *user_dn_search_results.begin(); + } break; } @@ -316,6 +339,9 @@ void LDAPClient::closeConnection() noexcept ldap_unbind_ext_s(handle, nullptr, nullptr); handle = nullptr; + final_user_name.clear(); + final_bind_dn.clear(); + final_user_dn.clear(); } LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) @@ -333,10 +359,19 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) case SearchParams::Scope::CHILDREN: scope = LDAP_SCOPE_CHILDREN; break; } - const auto escaped_user_name = escapeForLDAP(params.user); - const auto bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", escaped_user_name} }); - const auto base_dn = replacePlaceholders(search_params.base_dn, { {"{user_name}", escaped_user_name}, {"{bind_dn}", bind_dn} }); - const auto search_filter = replacePlaceholders(search_params.search_filter, { {"{user_name}", escaped_user_name}, {"{bind_dn}", bind_dn}, {"{base_dn}", base_dn} }); + const auto final_base_dn = replacePlaceholders(search_params.base_dn, { + {"{user_name}", final_user_name}, + {"{bind_dn}", final_bind_dn}, + {"{user_dn}", final_user_dn} + }); + + const auto final_search_filter = replacePlaceholders(search_params.search_filter, { + {"{user_name}", final_user_name}, + {"{bind_dn}", final_bind_dn}, + {"{user_dn}", final_user_dn}, + {"{base_dn}", final_base_dn} + }); + char * attrs[] = { const_cast(search_params.attribute.c_str()), nullptr }; ::timeval timeout = { params.search_timeout.count(), 0 }; LDAPMessage* msgs = nullptr; @@ -349,7 +384,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) } }); - diag(ldap_search_ext_s(handle, base_dn.c_str(), scope, search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); + diag(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); for ( auto * msg = ldap_first_message(handle, msgs); @@ -361,6 +396,27 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) { case LDAP_RES_SEARCH_ENTRY: { + // Extract DN separately, if the requested attribute is DN. + if (boost::iequals("dn", search_params.attribute)) + { + BerElement * ber = nullptr; + + SCOPE_EXIT({ + if (ber) + { + ber_free(ber, 0); + ber = nullptr; + } + }); + + ::berval bv; + + diag(ldap_get_dn_ber(handle, msg, &ber, &bv)); + + if (bv.bv_val && bv.bv_len > 0) + result.emplace(bv.bv_val, bv.bv_len); + } + BerElement * ber = nullptr; SCOPE_EXIT({ @@ -471,12 +527,12 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) return result; } -bool LDAPSimpleAuthClient::authenticate(const SearchParamsList * search_params, SearchResultsList * search_results) +bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search_params, SearchResultsList * role_search_results) { if (params.user.empty()) throw Exception("LDAP authentication of a user with empty name is not allowed", ErrorCodes::BAD_ARGUMENTS); - if (!search_params != !search_results) + if (!role_search_params != !role_search_results) throw Exception("Cannot return LDAP search results", ErrorCodes::BAD_ARGUMENTS); // Silently reject authentication attempt if the password is empty as if it didn't match. @@ -489,21 +545,21 @@ bool LDAPSimpleAuthClient::authenticate(const SearchParamsList * search_params, openConnection(); // While connected, run search queries and save the results, if asked. - if (search_params) + if (role_search_params) { - search_results->clear(); - search_results->reserve(search_params->size()); + role_search_results->clear(); + role_search_results->reserve(role_search_params->size()); try { - for (const auto & single_search_params : *search_params) + for (const auto & params_instance : *role_search_params) { - search_results->emplace_back(search(single_search_params)); + role_search_results->emplace_back(search(params_instance)); } } catch (...) { - search_results->clear(); + role_search_results->clear(); throw; } } @@ -532,7 +588,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams &) throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } -bool LDAPSimpleAuthClient::authenticate(const SearchParamsList *, SearchResultsList *) +bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList *, SearchResultsList *) { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index 4fc97bb957b..388e7ad0f0d 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -38,12 +38,20 @@ public: Scope scope = Scope::SUBTREE; String search_filter; String attribute = "cn"; + + void combineHash(std::size_t & seed) const; + }; + + struct RoleSearchParams + : public SearchParams + { String prefix; void combineHash(std::size_t & seed) const; }; - using SearchParamsList = std::vector; + using RoleSearchParamsList = std::vector; + using SearchResults = std::set; using SearchResultsList = std::vector; @@ -105,6 +113,8 @@ public: String user; String password; + std::optional user_dn_detection; + std::chrono::seconds verification_cooldown{0}; std::chrono::seconds operation_timeout{40}; @@ -134,6 +144,9 @@ protected: #if USE_LDAP LDAP * handle = nullptr; #endif + String final_user_name; + String final_bind_dn; + String final_user_dn; }; class LDAPSimpleAuthClient @@ -141,7 +154,7 @@ class LDAPSimpleAuthClient { public: using LDAPClient::LDAPClient; - bool authenticate(const SearchParamsList * search_params, SearchResultsList * search_results); + bool authenticate(const RoleSearchParamsList * role_search_params, SearchResultsList * role_search_results); }; } From a9e5532da62873ae7d9920086ca83aaae161df43 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Mon, 29 Mar 2021 12:27:16 +0400 Subject: [PATCH 002/204] Fix builds: soothe the linters --- src/Access/ExternalAuthenticators.cpp | 5 ++++- src/Access/LDAPClient.cpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 9eaf2a4b04b..99a3347b0de 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -99,7 +99,10 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf if (has_user_dn_detection) { if (!params.user_dn_detection) - params.user_dn_detection = { .attribute = "dn" }; + { + params.user_dn_detection.emplace(); + params.user_dn_detection->attribute = "dn"; + } parseLDAPSearchParams(*params.user_dn_detection, config, ldap_server_config + ".user_dn_detection"); } diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 78b0b7f545b..a8f9675774b 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -313,7 +313,7 @@ void LDAPClient::openConnection() { const auto user_dn_search_results = search(*params.user_dn_detection); - if (user_dn_search_results.size() == 0) + if (user_dn_search_results.empty()) throw Exception("Failed to detect user DN: empty search results", ErrorCodes::LDAP_ERROR); if (user_dn_search_results.size() > 1) From 0e5c58c8b20d56eded6e2b786f8ae97ebd2ae466 Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 19 Apr 2021 19:00:30 -0400 Subject: [PATCH 003/204] Adding user DN detection tests. --- .../testflows/ldap/role_mapping/regression.py | 3 + .../role_mapping/requirements/requirements.md | 70 ++- .../role_mapping/requirements/requirements.py | 184 ++++++- .../ldap/role_mapping/tests/common.py | 7 +- .../role_mapping/tests/user_dn_detection.py | 474 ++++++++++++++++++ 5 files changed, 728 insertions(+), 10 deletions(-) create mode 100644 tests/testflows/ldap/role_mapping/tests/user_dn_detection.py diff --git a/tests/testflows/ldap/role_mapping/regression.py b/tests/testflows/ldap/role_mapping/regression.py index 7afb6c98713..c853316ecec 100755 --- a/tests/testflows/ldap/role_mapping/regression.py +++ b/tests/testflows/ldap/role_mapping/regression.py @@ -11,6 +11,8 @@ from ldap.role_mapping.requirements import * # Cross-outs of known fails xfails = { "mapping/roles removed and added in parallel": + [(Fail, "known bug")], + "user dn detection/mapping/roles removed and added in parallel": [(Fail, "known bug")] } @@ -42,6 +44,7 @@ def regression(self, local, clickhouse_binary_path, stress=None, parallel=None): Scenario(run=load("ldap.authentication.tests.sanity", "scenario"), name="ldap sanity") Feature(run=load("ldap.role_mapping.tests.server_config", "feature")) Feature(run=load("ldap.role_mapping.tests.mapping", "feature")) + Feature(run=load("ldap.role_mapping.tests.user_dn_detection", "feature")) if main(): regression() diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.md b/tests/testflows/ldap/role_mapping/requirements/requirements.md index e79baa9cd7c..fbd772b9d29 100644 --- a/tests/testflows/ldap/role_mapping/requirements/requirements.md +++ b/tests/testflows/ldap/role_mapping/requirements/requirements.md @@ -44,6 +44,11 @@ * 4.7.1 [BindDN Parameter](#binddn-parameter) * 4.7.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN](#rqsrs-014ldaprolemappingconfigurationserverbinddn) * 4.7.1.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN](#rqsrs-014ldaprolemappingconfigurationserverbinddnconflictwithauthdn) + * 4.7.2 [User DN Detection](#user-dn-detection) + * 4.7.2.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection](#rqsrs-014ldaprolemappingconfigurationserveruserdndetection) + * 4.7.2.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.BaseDN](#rqsrs-014ldaprolemappingconfigurationserveruserdndetectionbasedn) + * 4.7.2.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.Scope](#rqsrs-014ldaprolemappingconfigurationserveruserdndetectionscope) + * 4.7.2.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.SearchFilter](#rqsrs-014ldaprolemappingconfigurationserveruserdndetectionsearchfilter) * 4.8 [External User Directory Configuration](#external-user-directory-configuration) * 4.8.1 [Syntax](#syntax) * 4.8.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsyntax) @@ -318,6 +323,67 @@ version: 1.0 [ClickHouse] SHALL return an error if both `` and `` or `` parameters are specified as part of [LDAP] server description in the `` section of the `config.xml`. +#### User DN Detection + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection +version: 1.0 + +[ClickHouse] SHALL support the `user_dn_detection` sub-section in the `` section +of the `config.xml` that SHALL be used to enable detecting the actual user DN of the bound user. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.BaseDN +version: 1.0 + +[ClickHouse] SHALL support `base_dn` parameter in the `user_dn_detection` sub-section in the +`` section of the `config.xml` that SHALL specify how +to construct the base DN for the LDAP search to detect the actual user DN. + +For example, + +```xml + + ... + CN=Users,DC=example,DC=com + +``` + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.Scope +version: 1.0 + +[ClickHouse] SHALL support `scope` parameter in the `user_dn_detection` sub-section in the +`` section of the `config.xml` that SHALL the scope of the +LDAP search to detect the actual user DN. The `scope` parameter SHALL support the following values + +* `base` +* `one_level` +* `children` +* `subtree` + +For example, + +```xml + + ... + one_level + +``` + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.SearchFilter +version: 1.0 + +[ClickHouse] SHALL support `search_filter` parameter in the `user_dn_detection` sub-section in the +`` section of the `config.xml` that SHALL specify the LDAP search +filter used to detect the actual user DN. + +For example, + +```xml + + ... + (&(objectClass=user)(sAMAccountName={user_name})) + +``` + ### External User Directory Configuration #### Syntax @@ -382,7 +448,7 @@ version: 1.0 [ClickHouse] SHALL support the `` parameter in the `` section of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search. -The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of +The resulting `DN` SHALL be constructed by replacing all the `{user_name}`, `{bind_dn}`, and `user_dn` substrings of the template with the actual user name and bind `DN` during each [LDAP] search. #### Attribute Parameter @@ -445,7 +511,7 @@ version: 1.0 section of the `config.xml` that SHALL specify the template used to construct the [LDAP filter](https://ldap.com/ldap-filters/) for the search. -The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings +The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, `{base_dn}`, and `{user_dn}` substrings of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search. #### Prefix Parameter diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.py b/tests/testflows/ldap/role_mapping/requirements/requirements.py index b2748762e03..68ce4f5913e 100644 --- a/tests/testflows/ldap/role_mapping/requirements/requirements.py +++ b/tests/testflows/ldap/role_mapping/requirements/requirements.py @@ -1,6 +1,6 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.210129.1222545. +# document by TestFlows v1.6.210412.1213859. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. from testflows.core import Specification @@ -488,6 +488,105 @@ RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN_ConflictWith_AuthDN = Re level=4, num='4.7.1.2') +RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support the `user_dn_detection` sub-section in the `` section\n' + 'of the `config.xml` that SHALL be used to enable detecting the actual user DN of the bound user. \n' + '\n' + ), + link=None, + level=4, + num='4.7.2.1') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.BaseDN', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support `base_dn` parameter in the `user_dn_detection` sub-section in the \n' + '`` section of the `config.xml` that SHALL specify how \n' + 'to construct the base DN for the LDAP search to detect the actual user DN.\n' + '\n' + 'For example,\n' + '\n' + '```xml\n' + '\n' + ' ...\n' + ' CN=Users,DC=example,DC=com\n' + '\n' + '```\n' + '\n' + ), + link=None, + level=4, + num='4.7.2.2') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_Scope = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.Scope', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support `scope` parameter in the `user_dn_detection` sub-section in the \n' + '`` section of the `config.xml` that SHALL the scope of the \n' + 'LDAP search to detect the actual user DN. The `scope` parameter SHALL support the following values\n' + '\n' + '* `base`\n' + '* `one_level`\n' + '* `children`\n' + '* `subtree`\n' + '\n' + 'For example,\n' + '\n' + '```xml\n' + '\n' + ' ...\n' + ' one_level\n' + '\n' + '```\n' + '\n' + ), + link=None, + level=4, + num='4.7.2.3') + +RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_SearchFilter = Requirement( + name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.SearchFilter', + version='1.0', + priority=None, + group=None, + type=None, + uid=None, + description=( + '[ClickHouse] SHALL support `search_filter` parameter in the `user_dn_detection` sub-section in the \n' + '`` section of the `config.xml` that SHALL specify the LDAP search\n' + 'filter used to detect the actual user DN.\n' + '\n' + 'For example,\n' + '\n' + '```xml\n' + '\n' + ' ...\n' + ' (&(objectClass=user)(sAMAccountName={user_name}))\n' + '\n' + '```\n' + '\n' + ), + link=None, + level=4, + num='4.7.2.4') + RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Syntax = Requirement( name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax', version='1.0', @@ -587,7 +686,7 @@ RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_BaseDN = Req '[ClickHouse] SHALL support the `` parameter in the `` section \n' 'of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search.\n' '\n' - 'The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of \n' + 'The resulting `DN` SHALL be constructed by replacing all the `{user_name}`, `{bind_dn}`, and `user_dn` substrings of \n' 'the template with the actual user name and bind `DN` during each [LDAP] search.\n' '\n' ), @@ -724,7 +823,7 @@ RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SearchFilter 'section of the `config.xml` that SHALL specify the template used to construct \n' 'the [LDAP filter](https://ldap.com/ldap-filters/) for the search.\n' '\n' - 'The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings \n' + 'The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, `{base_dn}`, and `{user_dn}` substrings \n' 'of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search.\n' ' \n' ), @@ -872,6 +971,11 @@ SRS_014_ClickHouse_LDAP_Role_Mapping = Specification( Heading(name='BindDN Parameter', level=3, num='4.7.1'), Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN', level=4, num='4.7.1.1'), Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN', level=4, num='4.7.1.2'), + Heading(name='User DN Detection', level=3, num='4.7.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection', level=4, num='4.7.2.1'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.BaseDN', level=4, num='4.7.2.2'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.Scope', level=4, num='4.7.2.3'), + Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.SearchFilter', level=4, num='4.7.2.4'), Heading(name='External User Directory Configuration', level=2, num='4.8'), Heading(name='Syntax', level=3, num='4.8.1'), Heading(name='RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax', level=4, num='4.8.1.1'), @@ -930,6 +1034,10 @@ SRS_014_ClickHouse_LDAP_Role_Mapping = Specification( RQ_SRS_014_LDAP_RoleMapping_Authentication_Parallel_SameUser, RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN, RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_BindDN_ConflictWith_AuthDN, + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection, + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN, + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_Scope, + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_SearchFilter, RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Syntax, RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_SpecialCharactersEscaping, RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_MultipleSections, @@ -996,6 +1104,11 @@ SRS_014_ClickHouse_LDAP_Role_Mapping = Specification( * 4.7.1 [BindDN Parameter](#binddn-parameter) * 4.7.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN](#rqsrs-014ldaprolemappingconfigurationserverbinddn) * 4.7.1.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN](#rqsrs-014ldaprolemappingconfigurationserverbinddnconflictwithauthdn) + * 4.7.2 [User DN Detection](#user-dn-detection) + * 4.7.2.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection](#rqsrs-014ldaprolemappingconfigurationserveruserdndetection) + * 4.7.2.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.BaseDN](#rqsrs-014ldaprolemappingconfigurationserveruserdndetectionbasedn) + * 4.7.2.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.Scope](#rqsrs-014ldaprolemappingconfigurationserveruserdndetectionscope) + * 4.7.2.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.SearchFilter](#rqsrs-014ldaprolemappingconfigurationserveruserdndetectionsearchfilter) * 4.8 [External User Directory Configuration](#external-user-directory-configuration) * 4.8.1 [Syntax](#syntax) * 4.8.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsyntax) @@ -1270,6 +1383,67 @@ version: 1.0 [ClickHouse] SHALL return an error if both `` and `` or `` parameters are specified as part of [LDAP] server description in the `` section of the `config.xml`. +#### User DN Detection + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection +version: 1.0 + +[ClickHouse] SHALL support the `user_dn_detection` sub-section in the `` section +of the `config.xml` that SHALL be used to enable detecting the actual user DN of the bound user. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.BaseDN +version: 1.0 + +[ClickHouse] SHALL support `base_dn` parameter in the `user_dn_detection` sub-section in the +`` section of the `config.xml` that SHALL specify how +to construct the base DN for the LDAP search to detect the actual user DN. + +For example, + +```xml + + ... + CN=Users,DC=example,DC=com + +``` + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.Scope +version: 1.0 + +[ClickHouse] SHALL support `scope` parameter in the `user_dn_detection` sub-section in the +`` section of the `config.xml` that SHALL the scope of the +LDAP search to detect the actual user DN. The `scope` parameter SHALL support the following values + +* `base` +* `one_level` +* `children` +* `subtree` + +For example, + +```xml + + ... + one_level + +``` + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.UserDNDetection.SearchFilter +version: 1.0 + +[ClickHouse] SHALL support `search_filter` parameter in the `user_dn_detection` sub-section in the +`` section of the `config.xml` that SHALL specify the LDAP search +filter used to detect the actual user DN. + +For example, + +```xml + + ... + (&(objectClass=user)(sAMAccountName={user_name})) + +``` + ### External User Directory Configuration #### Syntax @@ -1334,7 +1508,7 @@ version: 1.0 [ClickHouse] SHALL support the `` parameter in the `` section of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search. -The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of +The resulting `DN` SHALL be constructed by replacing all the `{user_name}`, `{bind_dn}`, and `user_dn` substrings of the template with the actual user name and bind `DN` during each [LDAP] search. #### Attribute Parameter @@ -1397,7 +1571,7 @@ version: 1.0 section of the `config.xml` that SHALL specify the template used to construct the [LDAP filter](https://ldap.com/ldap-filters/) for the search. -The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings +The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, `{base_dn}`, and `{user_dn}` substrings of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search. #### Prefix Parameter diff --git a/tests/testflows/ldap/role_mapping/tests/common.py b/tests/testflows/ldap/role_mapping/tests/common.py index 33ad4a46f52..565503296e3 100644 --- a/tests/testflows/ldap/role_mapping/tests/common.py +++ b/tests/testflows/ldap/role_mapping/tests/common.py @@ -24,11 +24,12 @@ def create_table(self, name, create_statement, on_cluster=False): node.query(f"DROP TABLE IF EXISTS {name}") @TestStep(Given) -def add_ldap_servers_configuration(self, servers, config_d_dir="/etc/clickhouse-server/config.d", +def add_ldap_servers_configuration(self, servers, config=None, config_d_dir="/etc/clickhouse-server/config.d", config_file="ldap_servers.xml", timeout=60, restart=False): """Add LDAP servers configuration to config.xml. """ - config = create_ldap_servers_config_content(servers, config_d_dir, config_file) + if config is None: + config = create_ldap_servers_config_content(servers, config_d_dir, config_file) return add_config(config, restart=restart) @TestStep(Given) @@ -249,4 +250,4 @@ def create_ldap_external_user_directory_config_content(server=None, roles=None, def create_entries_ldap_external_user_directory_config_content(entries, **kwargs): """Create LDAP external user directory configuration file content. """ - return create_xml_config_content(entries, **kwargs) \ No newline at end of file + return create_xml_config_content(entries, **kwargs) diff --git a/tests/testflows/ldap/role_mapping/tests/user_dn_detection.py b/tests/testflows/ldap/role_mapping/tests/user_dn_detection.py new file mode 100644 index 00000000000..9ec24040973 --- /dev/null +++ b/tests/testflows/ldap/role_mapping/tests/user_dn_detection.py @@ -0,0 +1,474 @@ +# -*- coding: utf-8 -*- +import importlib + +from testflows.core import * +from testflows.asserts import error + +from ldap.role_mapping.requirements import * +from ldap.role_mapping.tests.common import * + +@TestOutline +def check_config(self, entries, valid=True, ldap_server="openldap1", user="user1", password="user1"): + """Apply LDAP server configuration and check login. + """ + if valid: + exitcode = 0 + message = "1" + else: + exitcode = 4 + message = "DB::Exception: user1: Authentication failed: password is incorrect or there is no user with such name" + + with Given("I add LDAP server configuration"): + config = create_xml_config_content(entries=entries, config_file="ldap_servers.xml") + add_ldap_servers_configuration(servers=None, config=config) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=ldap_server, + role_mappings=None, restart=True) + + with When(f"I login I try to login as an LDAP user"): + r = self.context.node.query(f"SELECT 1", settings=[ + ("user", user), ("password", password)], exitcode=exitcode, message=message) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN("1.0") +) +def config_invalid_base_dn(self): + """Check when invalid `base_dn` is specified in the user_dn_detection section. + """ + + with Given("I define LDAP server configuration with invalid base_dn"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "ou=user,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + } + } + ] + } + + check_config(entries=entries, valid=False) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN("1.0") +) +def config_empty_base_dn(self): + """Check when empty `base_dn` is specified in the user_dn_detection section. + """ + with Given("I define LDAP server configuration with invalid base_dn"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + } + } + ] + } + + check_config(entries=entries, valid=False) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN("1.0") +) +def config_missing_base_dn(self): + """Check when missing `base_dn` is specified in the user_dn_detection section. + """ + with Given("I define LDAP server configuration with invalid base_dn"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + } + } + ] + } + + check_config(entries=entries, valid=False) + +@TestScenario +@Tags("config") +@Requirements( + # FIXME +) +def config_invalid_search_filter(self): + """Check when invalid `search_filter` is specified in the user_dn_detection section. + """ + with Given("I define LDAP server configuration with invalid search_filter"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPersons)(uid={user_name}))" + } + } + } + ] + } + + check_config(entries=entries, valid=False) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_SearchFilter("1.0") +) +def config_missing_search_filter(self): + """Check when missing `search_filter` is specified in the user_dn_detection section. + """ + with Given("I define LDAP server configuration with invalid search_filter"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + } + } + } + ] + } + + check_config(entries=entries, valid=False) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_SearchFilter("1.0") +) +def config_empty_search_filter(self): + """Check when empty `search_filter` is specified in the user_dn_detection section. + """ + with Given("I define LDAP server configuration with invalid search_filter"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "" + } + } + } + ] + } + + check_config(entries=entries, valid=False) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_SearchFilter("1.0") +) +def config_valid(self): + """Check valid config with valid user_dn_detection section. + """ + with Given("I define LDAP server configuration"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + } + } + ] + } + + check_config(entries=entries, valid=True) + +@TestScenario +@Tags("config") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_BaseDN("1.0"), + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_SearchFilter("1.0") +) +def config_valid_tls_connection(self): + """Check valid config with valid user_dn_detection section when + using LDAP that is configured to use TLS connection. + """ + with Given("I define LDAP server configuration"): + entries = { + "ldap_servers": [ + { + "openldap2": { + "host": "openldap2", + "port": "636", + "enable_tls": "yes", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "tls_require_cert": "never", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + } + } + ] + } + + check_config(entries=entries, valid=True, ldap_server="openldap2", user="user2", password="user2") + +@TestOutline(Scenario) +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection_Scope("1.0") +) +@Examples("scope base_dn", [ + ("base", "cn=user1,ou=users,dc=company,dc=com"), + ("one_level","ou=users,dc=company,dc=com"), + ("children","ou=users,dc=company,dc=com"), + ("subtree","ou=users,dc=company,dc=com") # default value +]) +def check_valid_scope_values(self, scope, base_dn): + """Check configuration with valid scope values. + """ + with Given("I define LDAP server configuration"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": base_dn, + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))", + "scope": scope + } + } + } + ] + } + + check_config(entries=entries, valid=True) + +@TestSuite +def mapping(self): + """Run all role mapping tests with both + openldap1 and openldap2 configured to use + user DN detection. + """ + users = [ + {"server": "openldap1", "username": "user1", "password": "user1", "login": True, + "dn": "cn=user1,ou=users,dc=company,dc=com"}, + ] + + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + }, + "openldap2": { + "host": "openldap2", + "port": "636", + "enable_tls": "yes", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + "tls_require_cert": "never", + "user_dn_detection": { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))" + } + } + }, + ] + } + + with Given("I add LDAP servers configuration"): + config = create_xml_config_content(entries=entries, config_file="ldap_servers.xml") + add_ldap_servers_configuration(servers=None, config=config) + + for scenario in loads(importlib.import_module("tests.mapping", package=None), Scenario): + scenario(ldap_server="openldap1", ldap_user=users[0]) + +@TestOutline +def setup_different_bind_dn_and_user_dn(self, uid, map_by, user_dn_detection): + """Check that roles get mapped properly when bind_dn and user_dn are different + by creating LDAP users that have switched uid parameter values. + """ + with Given("I define LDAP server configuration"): + entries = { + "ldap_servers": [ + { + "openldap1": { + "host": "openldap1", + "port": "389", + "enable_tls": "no", + "bind_dn": "cn={user_name},ou=users,dc=company,dc=com", + } + } + ] + } + + if user_dn_detection: + with And("I enable user dn detection"): + entries["ldap_servers"][0]["openldap1"]["user_dn_detection"] = { + "base_dn": "ou=users,dc=company,dc=com", + "search_filter": "(&(objectClass=inetOrgPerson)(uid={user_name}))", + "scope": "subtree" + } + + with And("I define role mappings"): + role_mappings = [ + { + "base_dn": "ou=groups,dc=company,dc=com", + "attribute": "cn", + "search_filter": f"(&(objectClass=groupOfUniqueNames)(uniquemember={{{map_by}}}))", + "prefix":"" + } + ] + + with Given("I add LDAP users"): + first_user = add_ldap_users(users=[ + {"cn": f"first_user", "userpassword": "user", "uid": "second_user"} + ])[0] + + second_user = add_ldap_users(users=[ + {"cn": f"second_user", "userpassword": "user", "uid": "first_user"} + ])[0] + + with Given("I add LDAP groups"): + groups = add_ldap_groups(groups=({"cn": f"role0_{uid}"}, {"cn": f"role1_{uid}"})) + + with And("I add LDAP user to each LDAP group"): + with By("adding first group to first user"): + add_user_to_group_in_ldap(user=first_user, group=groups[0]) + with And("adding second group to second user"): + add_user_to_group_in_ldap(user=second_user, group=groups[1]) + + with And("I add RBAC roles"): + roles = add_rbac_roles(roles=(f"role0_{uid}", f"role1_{uid}")) + + with Given("I add LDAP server configuration"): + config = create_xml_config_content(entries=entries, config_file="ldap_servers.xml") + add_ldap_servers_configuration(servers=None, config=config) + + with And("I add LDAP external user directory configuration"): + add_ldap_external_user_directory(server=self.context.ldap_node.name, + role_mappings=role_mappings, restart=True) + +@TestScenario +def map_roles_by_user_dn_when_base_dn_and_user_dn_are_different(self): + """Check the case when we map roles using user_dn then + the first user has uid of second user and second user + has uid of first user and configuring user DN detection to + determine user_dn based on the uid value so that user_dn + for the first user will be bind_dn of the second user and + vice versa. + """ + uid = getuid() + + setup_different_bind_dn_and_user_dn(uid=uid, map_by="user_dn", user_dn_detection=True) + + with When(f"I login as first LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", "first_user"), ("password", "user")]) + + with Then("I expect the first user to have mapped LDAP roles from second user"): + assert f"GRANT role1_{uid} TO first_user" in r.output, error() + + with When(f"I login as second LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", "second_user"), ("password", "user")]) + + with Then("I expect the second user to have mapped LDAP roles from first user"): + assert f"GRANT role0_{uid} TO second_user" in r.output, error() + +@TestScenario +def map_roles_by_bind_dn_when_base_dn_and_user_dn_are_different(self): + """Check the case when we map roles by bind_dn when bind_dn and user_dn + are different. + """ + uid = getuid() + + setup_different_bind_dn_and_user_dn(uid=uid, map_by="bind_dn", user_dn_detection=True) + + with When(f"I login as first LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", "first_user"), ("password", "user")]) + + with Then("I expect the first user to have no mapped LDAP roles"): + assert f"GRANT role0_{uid} TO first_user" == r.output, error() + + with When(f"I login as second LDAP user"): + r = self.context.node.query(f"SHOW GRANTS", settings=[ + ("user", "second_user"), ("password", "user")]) + + with Then("I expect the second user to have no mapped LDAP roles"): + assert f"GRANT role1_{uid} TO second_user" in r.output, error() + +@TestFeature +@Name("user dn detection") +@Requirements( + RQ_SRS_014_LDAP_RoleMapping_Configuration_Server_UserDNDetection("1.0") +) +def feature(self): + """Check LDAP user DN detection. + """ + self.context.node = self.context.cluster.node("clickhouse1") + self.context.ldap_node = self.context.cluster.node("openldap1") + + with Given("I fix LDAP access permissions"): + fix_ldap_permissions(node=self.context.cluster.node("openldap1")) + fix_ldap_permissions(node=self.context.cluster.node("openldap2")) + + for scenario in ordered(loads(current_module(), Scenario)): + scenario() + + Suite(run=mapping) From e694af921d9306b6f2486744815ef8bfa486917c Mon Sep 17 00:00:00 2001 From: adevyatova Date: Sun, 25 Apr 2021 20:04:42 +0000 Subject: [PATCH 004/204] add settings desc --- .../settings/merge-tree-settings.md | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 77b68715ba9..f36d46c4d3d 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -115,6 +115,39 @@ Default value: 604800 (1 week). Similar to [replicated_deduplication_window](#replicated-deduplication-window), `replicated_deduplication_window_seconds` specifies how long to store hash sums of blocks for insert deduplication. Hash sums older than `replicated_deduplication_window_seconds` are removed from Zookeeper, even if they are less than ` replicated_deduplication_window`. +## replicated_fetches_http_connection_timeout + +HTTP connection timeout (in seconds) for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly. + +Possible values: + +- Any positive integer. +- 0 - Disabled. + +Default value: 0. + +## replicated_fetches_http_send_timeout + +HTTP send timeout (in seconds) for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly. + +Possible values: + +- Any positive integer. +- 0 - Disabled. + +Default value: 0. + +## replicated_fetches_http_receive_timeout + +HTTP receive timeout (in seconds) for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly. + +Possible values: + +- Any positive integer. +- 0 - Disabled. + +Default value: 0. + ## old_parts_lifetime {#old-parts-lifetime} The time (in seconds) of storing inactive parts to protect against data loss during spontaneous server reboots. From 47a4ae930f59fa888ec86f187ad22e6ace350cb9 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Mon, 3 May 2021 23:44:46 +0300 Subject: [PATCH 005/204] edited --- .../parametric-functions.md | 2 +- .../functions/ip-address-functions.md | 4 +- .../functions/type-conversion-functions.md | 48 +++++++++++--- .../parametric-functions.md | 2 +- .../functions/ip-address-functions.md | 8 +-- .../functions/type-conversion-functions.md | 66 ++++++++++++++----- 6 files changed, 97 insertions(+), 33 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index b9d504241db..83644ef1272 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -253,7 +253,7 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN) **Parameters** -- `window` — Length of the sliding window, it is the time interval between first condition and last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. +- `window` — Length of the sliding window, it is the time interval between the first condition and last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. - `mode` — It is an optional argument. One or more modes can be set. - `'strict'` — If same condition holds for sequence of events then such non-unique events would be skipped. - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 0b5dd7160b8..d37ef2e8f1a 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -422,7 +422,7 @@ Type: [UInt8](../../sql-reference/data-types/int-uint.md). Query: ``` sql -SELECT isIPAddressInRange('127.0.0.1', '127.0.0.0/8') +SELECT isIPAddressInRange('127.0.0.1', '127.0.0.0/8'); ``` Result: @@ -436,7 +436,7 @@ Result: Query: ``` sql -SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16') +SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16'); ``` Result: diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index d8d13d81d97..0d9053c993d 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -373,7 +373,7 @@ This function accepts a number or date or date with time, and returns a FixedStr ## reinterpretAsUUID {#reinterpretasuuid} -This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. +Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions works as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** @@ -429,7 +429,24 @@ Result: ## reinterpret(x, T) {#type_conversion_function-reinterpret} -Use the same source in-memory bytes sequence for `x` value and reinterpret it to destination type +Uses the same source in-memory bytes sequence for `x` value and reinterprets it to destination type. + +**Syntax** + +``` sql +reinterpret(x, type) +``` + +**Arguments** + +- `x` — Any type. +- `type` — Destination type. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Destination type value. + +**Examples** Query: ```sql @@ -448,21 +465,36 @@ Result: ## CAST(x, T) {#type_conversion_function-cast} -Converts input value `x` to the `T` data type. Unlike to `reinterpret` function use external representation of `x` value. +Converts input value `x` to the `T` data type. Unlike to `reinterpret` function, type conversion is performed in a natural way. The syntax `CAST(x AS t)` is also supported. Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255. +**Syntax** + +``` sql +CAST(x, T) +``` + +**Arguments** + +- `x` — Any type. +- `T` — Destination type. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Destination type value. + **Examples** Query: ```sql SELECT - cast(toInt8(-1), 'UInt8') AS cast_int_to_uint, - cast(toInt8(1), 'Float32') AS cast_int_to_float, - cast('1', 'UInt32') AS cast_string_to_int + CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint, + CAST(toInt8(1), 'Float32') AS cast_int_to_float, + CAST('1', 'UInt32') AS cast_string_to_int ``` Result: @@ -492,7 +524,7 @@ Result: └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Conversion to FixedString(N) only works for arguments of type String or FixedString(N). +Conversion to FixedString(N) only works for arguments of type [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. @@ -1038,7 +1070,7 @@ Result: ## parseDateTime64BestEffort {#parsedatetime64besteffort} -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and return `DateTime64(3)` or `DateTime64(6)` data types. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns `DateTime64(3)` or `DateTime64(6)` data types. **Syntax** diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index e5162b63b88..90754aa999b 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -253,7 +253,7 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN) **Параметры** -- `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Должно соблюдаться условие `timestamp события cond2 <= timestamp события cond1 + window`. +- `window` — ширина скользящего окна по времени. Это время между первым и последним условием. Единица измерения зависит от `timestamp` и может варьироваться. Должно соблюдаться условие `timestamp события cond1 <= timestamp события cond2 <= ... <= timestamp события condN <= timestamp события cond1 + window`. - `mode` — необязательный параметр. Может быть установленно несколько значений одновременно. - `'strict'` — не учитывать подряд идущие повторяющиеся события. - `'strict_order'` — запрещает посторонние события в искомой последовательности. Например, при поиске цепочки `A->B->C` в `A->B->D->C` поиск будет остановлен на `D` и функция вернет 2. diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index d7f6d2f7618..10ded819fef 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -397,7 +397,7 @@ SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0 ## isIPAddressInRange {#isipaddressinrange} -Проверяет попадает ли IP адрес в интервал, заданный в [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing) нотации. +Проверяет, попадает ли IP адрес в интервал, заданный в нотации [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing). **Syntax** @@ -409,7 +409,7 @@ isIPAddressInRange(address, prefix) **Аргументы** - `address` — IPv4 или IPv6 адрес. [String](../../sql-reference/data-types/string.md). -- `prefix` — IPv4 или IPv6 подсеть, заданная в CIDR нотации. [String](../../sql-reference/data-types/string.md). +- `prefix` — IPv4 или IPv6 подсеть, заданная в нотации CIDR. [String](../../sql-reference/data-types/string.md). **Возвращаемое значение** @@ -422,7 +422,7 @@ isIPAddressInRange(address, prefix) Запрос: ``` sql -SELECT isIPAddressInRange('127.0.0.1', '127.0.0.0/8') +SELECT isIPAddressInRange('127.0.0.1', '127.0.0.0/8'); ``` Результат: @@ -436,7 +436,7 @@ SELECT isIPAddressInRange('127.0.0.1', '127.0.0.0/8') Запрос: ``` sql -SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16') +SELECT isIPAddressInRange('127.0.0.1', 'ffff::/16'); ``` Результат: diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index fc1dd15f8e3..5f79ac635ba 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -369,7 +369,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; ## reinterpretAsUUID {#reinterpretasuuid} -Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количетсвом нулевых байт с конца. Если строка длиннее, чем шестнадцать байт, то игнорируются лишние байты с конца. +Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количеством нулевых байтов с конца. Если строка длиннее, чем шестнадцать байтов, то игнорируются лишние байты с конца. **Синтаксис** @@ -425,9 +425,27 @@ SELECT uuid = uuid2; ## reinterpret(x, T) {#type_conversion_function-reinterpret} -Использует туже самую исходную последовательность байт в памяти для значения `x` и переинтерпретирует ее как конечный тип данных +Использует ту же самую исходную последовательность байтов в памяти для значения `x` и интерпретирует ее как конечный тип данных `T`. + +**Синтаксис** + +``` sql +reinterpret(x, type) +``` + +**Аргументы** + +- `x` — любой тип данных. +- `type` — конечный тип данных. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Конечный тип данных. + +**Примеры** Запрос: + ```sql SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, reinterpret(toInt8(1), 'Float32') as int_to_float, @@ -450,15 +468,30 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255. +**Синтаксис** + +``` sql +CAST(x, T) +``` + +**Аргументы** + +- `x` — любой тип данных. +- `T` — конечный тип данных. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Конечный тип данных. + **Примеры** Запрос: ```sql SELECT - cast(toInt8(-1), 'UInt8') AS cast_int_to_uint, - cast(toInt8(1), 'Float32') AS cast_int_to_float, - cast('1', 'UInt32') AS cast_string_to_int + CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint, + CAST(toInt8(1), 'Float32') AS cast_int_to_float, + CAST('1', 'UInt32') AS cast_string_to_int ``` Результат: @@ -488,7 +521,7 @@ SELECT └─────────────────────┴─────────────────────┴────────────┴─────────────────────┴───────────────────────────┘ ``` -Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N). +Преобразование в FixedString(N) работает только для аргументов типа [String](../../sql-reference/data-types/string.md) или [FixedString](../../sql-reference/data-types/fixedstring.md). Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. @@ -860,7 +893,7 @@ AS parseDateTimeBestEffortUS; ## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero} ## parseDateTime32BestEffortOrZero {#parsedatetime32besteffortorzero} -Работает также как [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевую дату или нулевую дату и время когда получает формат даты который не может быть обработан. +Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевую дату или нулевую дату и время, когда получает формат даты, который не может быть обработан. ## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull} @@ -1036,19 +1069,19 @@ SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOr ## parseDateTime64BestEffort {#parsedatetime64besteffort} -Работает также как функция [parseDateTimeBestEffort](#parsedatetimebesteffort) но также понимамет милисекунды и микросекунды и возвращает `DateTime64(3)` или `DateTime64(6)` типы данных в зависимости от заданной точности. +Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но также принимает миллисекунды и микросекунды и возвращает типы данных `DateTime64(3)` или `DateTime64(6)` в зависимости от заданной точности. -**Syntax** +**Синтаксис** ``` sql parseDateTime64BestEffort(time_string [, precision [, time_zone]]) ``` -**Parameters** +**Параметры** -- `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md). -- `precision` — `3` for milliseconds, `6` for microseconds. Default `3`. Optional [UInt8](../../sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `time_string` — строка, содержащая дату или дату со временем, которые нужно преобразовать. [String](../../sql-reference/data-types/string.md). +- `precision` — `3` для миллисекунд, `6` для микросекунд. По умолчанию `3`. Необязательный. [UInt8](../../sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Разбирает значение `time_string` в зависимости от часового пояса. Необязательный. [String](../../sql-reference/data-types/string.md). **Примеры** @@ -1078,12 +1111,11 @@ FORMAT PrettyCompactMonoBlcok ## parseDateTime64BestEffortOrNull {#parsedatetime32besteffortornull} -Работает также как функция [parseDateTime64BestEffort](#parsedatetime64besteffort) но возвращает `NULL` когда встречает формат даты который не может обработать. +Работает аналогично функции [parseDateTime64BestEffort](#parsedatetime64besteffort), но возвращает `NULL`, когда встречает формат даты, который не может обработать. ## parseDateTime64BestEffortOrZero {#parsedatetime64besteffortorzero} -Работает также как функция [parseDateTime64BestEffort](#parsedatetimebesteffort) но возвращает "нулевую" дату и время когда встречает формат даты который не может обработать. - +Работает аналогично функции [parseDateTime64BestEffort](#parsedatetimebesteffort), но возвращает "нулевую" дату и время, когда встречает формат даты, который не может обработать. ## toLowCardinality {#tolowcardinality} @@ -1130,7 +1162,7 @@ SELECT toLowCardinality('1'); ## toUnixTimestamp64Nano {#tounixtimestamp64nano} Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. -Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение - это временная метка в UTC, а не в часовом поясе `DateTime64`. +Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. **Синтаксис** From 458cd6961ce5d566a202e932f8b1c60d37dd3d49 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Tue, 4 May 2021 13:19:45 +0300 Subject: [PATCH 006/204] minor changes --- .../parametric-functions.md | 2 +- .../functions/type-conversion-functions.md | 18 +++++++++++------- .../parametric-functions.md | 2 +- .../functions/type-conversion-functions.md | 12 ++++++++---- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 83644ef1272..2a221bbb6eb 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -312,7 +312,7 @@ FROM GROUP BY user_id ) GROUP BY level -ORDER BY level ASC +ORDER BY level ASC; ``` Result: diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 0d9053c993d..87dbfa1728f 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -469,7 +469,7 @@ Converts input value `x` to the `T` data type. Unlike to `reinterpret` function, The syntax `CAST(x AS t)` is also supported. -Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255. +Note, that if value `x` does not fit the bounds of type `T`, the function overflows. For example, `CAST(-1, 'UInt8')` returns `255`. **Syntax** @@ -494,7 +494,7 @@ Query: SELECT CAST(toInt8(-1), 'UInt8') AS cast_int_to_uint, CAST(toInt8(1), 'Float32') AS cast_int_to_float, - CAST('1', 'UInt32') AS cast_string_to_int + CAST('1', 'UInt32') AS cast_string_to_int; ``` Result: @@ -1070,7 +1070,7 @@ Result: ## parseDateTime64BestEffort {#parsedatetime64besteffort} -Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns `DateTime64(3)` or `DateTime64(6)` data types. +Same as [parseDateTimeBestEffort](#parsedatetimebesteffort) function but also parse milliseconds and microseconds and returns [DateTime](../../sql-reference/functions/type-conversion-functions.md#data_type-datetime) data type. **Syntax** @@ -1081,9 +1081,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Parameters** - `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md). -- `precision` — `3` for milliseconds, `6` for microseconds. Default `3`. Optional [UInt8](../../sql-reference/data-types/int-uint.md). +- `precision` — `3` for milliseconds, `6` for microseconds. Default `3`. Optional. [UInt8](../../sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). +**Returned value** + +- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type. + **Examples** Query: @@ -1096,7 +1100,7 @@ UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',6) AS a, toTypeName(a) AS t UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',3,'Europe/Moscow') AS a, toTypeName(a) AS t -FORMAT PrettyCompactMonoBlcok +FORMAT PrettyCompactMonoBlcok; ``` Result: @@ -1168,7 +1172,7 @@ Input value is scaled up or down appropriately depending on it precision. Please **Syntax** -``` sql +```sql toUnixTimestamp64Milli(value) ``` @@ -1184,7 +1188,7 @@ toUnixTimestamp64Milli(value) Query: -``` sql +```sql WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 SELECT toUnixTimestamp64Milli(dt64); ``` diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index 90754aa999b..508c8de2a58 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -311,7 +311,7 @@ FROM GROUP BY user_id ) GROUP BY level -ORDER BY level ASC +ORDER BY level ASC; ``` ## retention {#retention} diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 5f79ac635ba..46032f29551 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1069,7 +1069,7 @@ SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOr ## parseDateTime64BestEffort {#parsedatetime64besteffort} -Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но также принимает миллисекунды и микросекунды и возвращает типы данных `DateTime64(3)` или `DateTime64(6)` в зависимости от заданной точности. +Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но также принимает миллисекунды и микросекунды. Возвращает тип данных [DateTime](../../sql-reference/functions/type-conversion-functions.md#data_type-datetime). **Синтаксис** @@ -1083,6 +1083,10 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) - `precision` — `3` для миллисекунд, `6` для микросекунд. По умолчанию `3`. Необязательный. [UInt8](../../sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Разбирает значение `time_string` в зависимости от часового пояса. Необязательный. [String](../../sql-reference/data-types/string.md). +**Возвращаемое значение** + +- `time_string`, преобразованная в тип данных [DateTime](../../sql-reference/data-types/datetime.md). + **Примеры** Запрос: @@ -1095,7 +1099,7 @@ UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',6) AS a, toTypeName(a) AS t UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',3,'Europe/Moscow') AS a, toTypeName(a) AS t -FORMAT PrettyCompactMonoBlcok +FORMAT PrettyCompactMonoBlcok; ``` Результат: @@ -1166,7 +1170,7 @@ SELECT toLowCardinality('1'); **Синтаксис** -``` sql +```sql toUnixTimestamp64Milli(value) ``` @@ -1182,7 +1186,7 @@ toUnixTimestamp64Milli(value) Запрос: -``` sql +```sql WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 SELECT toUnixTimestamp64Milli(dt64); ``` From 4aad69dc90b56c3f1fc62849f6be2658feec91fb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 1 May 2021 18:00:43 +0000 Subject: [PATCH 007/204] Better commnets --- src/Storages/RabbitMQ/RabbitMQHandler.h | 9 +++ src/Storages/RabbitMQ/RabbitMQSettings.h | 4 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 86 +++++++++++------------ src/Storages/RabbitMQ/StorageRabbitMQ.h | 7 +- 4 files changed, 57 insertions(+), 49 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 51cfbdc1144..d312a6c75c6 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -17,6 +17,15 @@ namespace Loop static const UInt8 STOP = 2; } + +class RabbitMQChannel : public AMQP::TcpChannel +{ +public: + RabbitMQChannel(AMQP::TcpConnection * connection) : TcpChannel(connection) {} + ~RabbitMQChannel() override { close(); } +}; + + class RabbitMQHandler : public AMQP::LibUvHandler { diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index c6725903898..c44648ebd40 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -20,11 +20,11 @@ namespace DB M(UInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(String, rabbitmq_queue_base, "", "Base for queue names to be able to reopen non-empty queues in case of failure.", 0) \ M(String, rabbitmq_deadletter_exchange, "", "Exchange name to be passed as a dead-letter-exchange name.", 0) \ - M(Bool, rabbitmq_persistent, false, "If set, delivery mode will be set to 2 (makes messages 'persistent', durable).", 0) \ + M(Bool, rabbitmq_persistent, false, "For insert query messages will be made 'persistent', durable.", 0) \ M(UInt64, rabbitmq_skip_broken_messages, 0, "Skip at least this number of broken messages from RabbitMQ per block", 0) \ M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \ M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ - M(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) \ + M(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) #define LIST_OF_RABBITMQ_SETTINGS(M) \ RABBITMQ_RELATED_SETTINGS(M) \ diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 525a08784be..edfd16c55c2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -259,46 +259,48 @@ size_t StorageRabbitMQ::getMaxBlockSize() const void StorageRabbitMQ::initRabbitMQ() { - setup_channel = std::make_shared(connection.get()); + RabbitMQChannel rabbit_channel(connection.get()); - initExchange(); - bindExchange(); + /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers + + initExchange(rabbit_channel); + bindExchange(rabbit_channel); for (const auto i : ext::range(0, num_queues)) - bindQueue(i + 1); + bindQueue(i + 1, rabbit_channel); LOG_TRACE(log, "RabbitMQ setup completed"); - rabbit_is_ready = true; - setup_channel->close(); } -void StorageRabbitMQ::initExchange() +void StorageRabbitMQ::initExchange(RabbitMQChannel & rabbit_channel) { - /* Binding scheme is the following: client's exchange -> key bindings by routing key list -> bridge exchange (fanout) -> - * -> sharding exchange (only if needed) -> queues - */ - setup_channel->declareExchange(exchange_name, exchange_type, AMQP::durable) + /// Exchange hierarchy: + /// 1. Main exchange (defined with table settings - rabbitmq_exchange_name, rabbitmq_exchange_type). + /// 2. Bridge exchange (fanout). Used to easily disconnect main exchange and to simplify queue bindings. + /// 3. Sharding (or hash) exchange. Used in case of multiple queues. + /// 4. Consumer exchange. Just an alias for bridge_exchange or sharding exchange to know to what exchange + /// queues will be bound. + + /// All exchanges are declared with options: + /// 1. `durable` (survive RabbitMQ server restart) + /// 2. `autodelete` (auto delete in case of queue bindings are dropped). + + rabbit_channel.declareExchange(exchange_name, exchange_type, AMQP::durable) .onError([&](const char * message) { - /* This error can be a result of attempt to declare exchange if it was already declared but - * 1) with different exchange type. In this case can - * - manually delete previously declared exchange and create a new one. - * - throw an error that the exchange with this name but another type is already declared and ask client to delete it himself - * if it is not needed anymore or use another exchange name. - * 2) with different exchange settings. This can only happen if client himself declared exchange with the same name and - * specified its own settings, which differ from this implementation. - */ + /// This error can be a result of attempt to declare exchange if it was already declared but + /// 1) with different exchange type. + /// 2) with different exchange settings. throw Exception("Unable to declare exchange. Make sure specified exchange is not already declared. Error: " + std::string(message), ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE); }); - /// Bridge exchange is needed to easily disconnect consumer queues and also simplifies queue bindings - setup_channel->declareExchange(bridge_exchange, AMQP::fanout, AMQP::durable + AMQP::autodelete) + rabbit_channel.declareExchange(bridge_exchange, AMQP::fanout, AMQP::durable | AMQP::autodelete) .onError([&](const char * message) { - /// This error is not supposed to happen as this exchange name is always unique to type and its settings + /// This error is not supposed to happen as this exchange name is always unique to type and its settings. throw Exception( ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, "Unable to declare bridge exchange ({}). Reason: {}", bridge_exchange, std::string(message)); }); @@ -309,26 +311,26 @@ void StorageRabbitMQ::initExchange() return; } - /* Change hash property because by default it will be routing key, which has to be an integer, but with support for any exchange - * type - routing keys might be of any type - */ AMQP::Table binding_arguments; + + /// Default routing key property in case of hash exchange is a routing key, which is required to be an integer. + /// Support for arbitrary exchange type (i.e. arbitary pattern of routing keys) requires to eliminate this dependency. + /// This settings changes hash propery to message_id. binding_arguments["hash-property"] = "message_id"; - /// Declare exchange for sharding. - setup_channel->declareExchange(sharding_exchange, AMQP::consistent_hash, AMQP::durable + AMQP::autodelete, binding_arguments) + /// Declare hash exchange for sharding. + rabbit_channel.declareExchange(sharding_exchange, AMQP::consistent_hash, AMQP::durable | AMQP::autodelete, binding_arguments) .onError([&](const char * message) { - /* This error can be a result of same reasons as above for exchange_name, i.e. it will mean that sharding exchange name appeared - * to be the same as some other exchange (which purpose is not for sharding). So probably actual error reason: queue_base parameter - * is bad. - */ + /// This error can be a result of same reasons as above for exchange_name, i.e. it will mean that sharding exchange name appeared + /// to be the same as some other exchange (which purpose is not for sharding). So probably actual error reason: queue_base parameter + /// is bad. throw Exception( ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, "Unable to declare sharding exchange ({}). Reason: {}", sharding_exchange, std::string(message)); }); - setup_channel->bindExchange(bridge_exchange, sharding_exchange, routing_keys[0]) + rabbit_channel.bindExchange(bridge_exchange, sharding_exchange, routing_keys[0]) .onError([&](const char * message) { throw Exception( @@ -343,7 +345,7 @@ void StorageRabbitMQ::initExchange() } -void StorageRabbitMQ::bindExchange() +void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) { std::atomic binding_created = false; size_t bound_keys = 0; @@ -358,7 +360,7 @@ void StorageRabbitMQ::bindExchange() bind_headers[matching[0]] = matching[1]; } - setup_channel->bindExchange(exchange_name, bridge_exchange, routing_keys[0], bind_headers) + rabbit_channel.bindExchange(exchange_name, bridge_exchange, routing_keys[0], bind_headers) .onSuccess([&]() { binding_created = true; }) .onError([&](const char * message) { @@ -370,7 +372,7 @@ void StorageRabbitMQ::bindExchange() } else if (exchange_type == AMQP::ExchangeType::fanout || exchange_type == AMQP::ExchangeType::consistent_hash) { - setup_channel->bindExchange(exchange_name, bridge_exchange, routing_keys[0]) + rabbit_channel.bindExchange(exchange_name, bridge_exchange, routing_keys[0]) .onSuccess([&]() { binding_created = true; }) .onError([&](const char * message) { @@ -384,7 +386,7 @@ void StorageRabbitMQ::bindExchange() { for (const auto & routing_key : routing_keys) { - setup_channel->bindExchange(exchange_name, bridge_exchange, routing_key) + rabbit_channel.bindExchange(exchange_name, bridge_exchange, routing_key) .onSuccess([&]() { ++bound_keys; @@ -408,7 +410,7 @@ void StorageRabbitMQ::bindExchange() } -void StorageRabbitMQ::bindQueue(size_t queue_id) +void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channel) { std::atomic binding_created = false; @@ -424,7 +426,7 @@ void StorageRabbitMQ::bindQueue(size_t queue_id) * done between client's exchange and local bridge exchange. Binding key must be a string integer in case of hash exchange, for * fanout exchange it can be arbitrary */ - setup_channel->bindQueue(consumer_exchange, queue_name, std::to_string(queue_id)) + rabbit_channel.bindQueue(consumer_exchange, queue_name, std::to_string(queue_id)) .onSuccess([&] { binding_created = true; }) .onError([&](const char * message) { @@ -460,7 +462,7 @@ void StorageRabbitMQ::bindQueue(size_t queue_id) * specific queue when its name is specified in queue_base setting */ const String queue_name = !hash_exchange ? queue_base : std::to_string(queue_id) + "_" + queue_base; - setup_channel->declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); + rabbit_channel.declareQueue(queue_name, AMQP::durable | AMQP::autodelete, queue_settings).onSuccess(success_callback).onError(error_callback); while (!binding_created) { @@ -537,8 +539,8 @@ void StorageRabbitMQ::unbindExchange() event_handler->updateLoopState(Loop::STOP); looping_task->deactivate(); - setup_channel = std::make_shared(connection.get()); - setup_channel->removeExchange(bridge_exchange) + RabbitMQChannel rabbit_channel(connection.get()); + rabbit_channel.removeExchange(bridge_exchange) .onSuccess([&]() { exchange_removed.store(true); @@ -552,8 +554,6 @@ void StorageRabbitMQ::unbindExchange() { event_handler->iterateLoop(); } - - setup_channel->close(); }); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index eeda6b9fdca..524aac32fe5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -113,7 +113,6 @@ private: std::atomic producer_id = 1; /// counter for producer buffer, needed for channel id std::atomic wait_confirm = true; /// needed to break waiting for confirmations for producer std::atomic exchange_removed = false, rabbit_is_ready = false; - ChannelPtr setup_channel; std::vector queues; std::once_flag flag; /// remove exchange only once @@ -141,9 +140,9 @@ private: void deactivateTask(BackgroundSchedulePool::TaskHolder & task, bool wait, bool stop_loop); void initRabbitMQ(); - void initExchange(); - void bindExchange(); - void bindQueue(size_t queue_id); + void initExchange(RabbitMQChannel & rabbit_channel); + void bindExchange(RabbitMQChannel & rabbit_channel); + void bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channel); bool restoreConnection(bool reconnecting); bool streamToViews(); From 13ba5287e3c37a45740052ed10f66f2657f8c080 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 May 2021 16:26:47 +0000 Subject: [PATCH 008/204] Add cleanup in case of drop table --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 12 +++++ src/Storages/RabbitMQ/RabbitMQHandler.h | 8 +++ .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 50 +++++++++++++++++-- src/Storages/RabbitMQ/StorageRabbitMQ.h | 10 ++++ .../integration/test_storage_rabbitmq/test.py | 38 ++++++++++++++ 6 files changed, 116 insertions(+), 3 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index d08b4806db8..9b5beb0c3f2 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -53,4 +53,16 @@ void RabbitMQHandler::iterateLoop() uv_run(loop, UV_RUN_NOWAIT); } +int RabbitMQHandler::startBlockingLoop() +{ + /// Return non-zero value, if uv_stop was called while there are still active events. + int ret = uv_run(loop, UV_RUN_DEFAULT); + return ret; +} + +void RabbitMQHandler::stopLoop() +{ + uv_stop(loop); +} + } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index d312a6c75c6..27027e7ab42 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -35,9 +35,17 @@ public: void onError(AMQP::TcpConnection * connection, const char * message) override; void onReady(AMQP::TcpConnection * connection) override; + /// Loop for background thread worker. void startLoop(); + + /// Loop to wait for small tasks in a non-blocking mode. void iterateLoop(); + /// Loop to wait for small tasks in a blocking mode. + int startBlockingLoop(); + + void stopLoop(); + bool connectionRunning() { return connection_running.load(); } bool loopRunning() { return loop_running.load(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 23e1d3f0649..e4c76470ed1 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -56,6 +56,7 @@ public: ChannelPtr & getChannel() { return consumer_channel; } void setupChannel(); bool needChannelUpdate(); + void closeChannel() { consumer_channel->close(); } void updateQueues(std::vector & queues_) { queues = queues_; } size_t queuesCount() { return queues.size(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index edfd16c55c2..842a569e633 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -462,7 +462,9 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channe * specific queue when its name is specified in queue_base setting */ const String queue_name = !hash_exchange ? queue_base : std::to_string(queue_id) + "_" + queue_base; - rabbit_channel.declareQueue(queue_name, AMQP::durable | AMQP::autodelete, queue_settings).onSuccess(success_callback).onError(error_callback); + + /// AMQP::autodelete setting is not allowd, because in case of server restart there will be no consumers and deleting queues should not take place. + rabbit_channel.declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); while (!binding_created) { @@ -644,12 +646,27 @@ void StorageRabbitMQ::shutdown() stream_cancelled = true; wait_confirm = false; - deactivateTask(streaming_task, true, false); - deactivateTask(looping_task, true, true); + /// In case it has not yet been able to setup connection; deactivateTask(connection_task, true, false); + /// The order of deactivating tasks is important: wait for streamingToViews() func to finish and + /// then wait for background event loop to finish. + deactivateTask(streaming_task, true, false); + deactivateTask(looping_task, true, true); + + if (drop_table) + { + for (auto & buffer : buffers) + buffer->closeChannel(); + cleanupRabbitMQ(); + } + + /// It is important to close connection here - before removing consumer buffers, because + /// it will finish and clean callbacks, which might use those buffers data. connection->close(); + /// Connection is not closed immediately - it requires the loop to shutdown it properly and to + /// finish all callbacks. size_t cnt_retries = 0; while (!connection->closed() && cnt_retries++ != RETRIES_MAX) event_handler->iterateLoop(); @@ -663,6 +680,33 @@ void StorageRabbitMQ::shutdown() } +/// The only thing publishers are supposed to be aware of is _exchanges_ and queues are a responsibility of a consumer. +/// Therefore, if a table is droppped, a clean up is needed. +void StorageRabbitMQ::cleanupRabbitMQ() const +{ + RabbitMQChannel rabbit_channel(connection.get()); + for (const auto & queue : queues) + { + /// AMQP::ifunused is needed, because it is possible to share queues between multiple tables and dropping + /// on of them should not affect others. + /// AMQP::ifempty is not used on purpose. + + rabbit_channel.removeQueue(queue, AMQP::ifunused) + .onSuccess([&](uint32_t num_messages) + { + LOG_TRACE(log, "Successfully deleted queue {}, messages contained {}", queue, num_messages); + event_handler->stopLoop(); + }) + .onError([&](const char * message) + { + LOG_ERROR(log, "Failed to delete queue {}. Error message: {}", queue, message); + event_handler->stopLoop(); + }); + } + event_handler->startBlockingLoop(); +} + + void StorageRabbitMQ::pushReadBuffer(ConsumerBufferPtr buffer) { std::lock_guard lock(buffers_mutex); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 524aac32fe5..6df8bd95276 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -33,6 +33,13 @@ public: void startup() override; void shutdown() override; + /// This is a bad way to let storage know in shutdown() that table is going to be dropped. There are some actions which need + /// to be done only when table is dropped (not when detached). Also connection must be closed only in shutdown, but those + /// actions require an open connection. Therefore there needs to be a way inside shutdown() method to know whether it is called + /// because of drop query. And drop() method is not suitable at all, because it will not only require to reopen connection, but also + /// it can be called considerable time after table is dropped (for example, in case of Atomic database), which is not appropriate for the case. + void checkTableCanBeDropped() const override { drop_table = true; } + /// Always return virtual columns in addition to required columns Pipe read( const Names & column_names, @@ -123,6 +130,7 @@ private: std::atomic stream_cancelled{false}; size_t read_attempts = 0; + mutable bool drop_table = false; ConsumerBufferPtr createReadBuffer(); @@ -140,6 +148,8 @@ private: void deactivateTask(BackgroundSchedulePool::TaskHolder & task, bool wait, bool stop_loop); void initRabbitMQ(); + void cleanupRabbitMQ() const; + void initExchange(RabbitMQChannel & rabbit_channel); void bindExchange(RabbitMQChannel & rabbit_channel); void bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channel); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index cab7685d96c..3e7096a4c55 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1994,6 +1994,44 @@ def test_rabbitmq_vhost(rabbitmq_cluster): break +@pytest.mark.timeout(120) +def test_rabbitmq_drop_table_properly(rabbitmq_cluster): + instance.query('CREATE DATABASE test_database') + instance.query(''' + CREATE TABLE test_database.rabbitmq_drop (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'drop', + rabbitmq_format = 'JSONEachRow', + rabbitmq_queue_base = 'rabbit_queue' + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + channel.basic_publish(exchange='drop', routing_key='', body=json.dumps({'key': 1, 'value': 2})) + while True: + result = instance.query('SELECT * FROM test_database.rabbitmq_drop ORDER BY key', ignore_error=True) + if result == "1\t2\n": + break + + exists = channel.queue_declare(queue='rabbit_queue', passive=True) + assert(exists) + + instance.query("DROP TABLE test_database.rabbitmq_drop") + time.sleep(30) + instance.query("DROP DATABASE test_database") + + try: + exists = channel.queue_declare(callback, queue='rabbit_queue', passive=True) + except Exception as e: + exists = False + + assert(not exists) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From a3feaa48f4c9431c0bd5d5709d52084e1aa1562e Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 May 2021 18:57:49 +0000 Subject: [PATCH 009/204] Allow user to define specific queue settings --- src/Storages/RabbitMQ/RabbitMQSettings.h | 3 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 62 ++++++++++++----- src/Storages/RabbitMQ/StorageRabbitMQ.h | 3 +- .../integration/test_storage_rabbitmq/test.py | 66 +++++++++++++------ 4 files changed, 98 insertions(+), 36 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index c44648ebd40..16d47bda81e 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -24,7 +24,8 @@ namespace DB M(UInt64, rabbitmq_skip_broken_messages, 0, "Skip at least this number of broken messages from RabbitMQ per block", 0) \ M(UInt64, rabbitmq_max_block_size, 0, "Number of row collected before flushing data from RabbitMQ.", 0) \ M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ - M(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) + M(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) \ + M(String, rabbitmq_queue_settings_list, "", "A list of rabbitmq queue settings", 0) \ #define LIST_OF_RABBITMQ_SETTINGS(M) \ RABBITMQ_RELATED_SETTINGS(M) \ diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 842a569e633..2aab01bed88 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -79,12 +79,13 @@ StorageRabbitMQ::StorageRabbitMQ( , exchange_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_exchange_name.value)) , format_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_format.value)) , exchange_type(defineExchangeType(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_exchange_type.value))) - , routing_keys(parseRoutingKeys(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_routing_key_list.value))) + , routing_keys(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_routing_key_list.value))) , row_delimiter(rabbitmq_settings->rabbitmq_row_delimiter.value) , schema_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_schema.value)) , num_consumers(rabbitmq_settings->rabbitmq_num_consumers.value) , num_queues(rabbitmq_settings->rabbitmq_num_queues.value) , queue_base(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_base.value)) + , queue_settings_list(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_settings_list.value))) , deadletter_exchange(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_deadletter_exchange.value)) , persistent(rabbitmq_settings->rabbitmq_persistent.value) , hash_exchange(num_consumers > 1 || num_queues > 1) @@ -147,10 +148,12 @@ StorageRabbitMQ::StorageRabbitMQ( } -Names StorageRabbitMQ::parseRoutingKeys(String routing_key_list) +Names StorageRabbitMQ::parseSettings(String settings_list) { Names result; - boost::split(result, routing_key_list, [](char c){ return c == ','; }); + if (settings_list.empty()) + return result; + boost::split(result, settings_list, [](char c){ return c == ','; }); for (String & key : result) boost::trim(key); @@ -250,11 +253,11 @@ void StorageRabbitMQ::deactivateTask(BackgroundSchedulePool::TaskHolder & task, size_t StorageRabbitMQ::getMaxBlockSize() const - { +{ return rabbitmq_settings->rabbitmq_max_block_size.changed ? rabbitmq_settings->rabbitmq_max_block_size.value : (getContext()->getSettingsRef().max_insert_block_size.value / num_consumers); - } +} void StorageRabbitMQ::initRabbitMQ() @@ -451,19 +454,50 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channe AMQP::Table queue_settings; - queue_settings["x-max-length"] = queue_size; + /// Check user-defined settings. + if (!queue_settings_list.empty()) + { + for (const auto & setting : queue_settings_list) + { + Strings setting_values; + splitInto<'='>(setting_values, setting); + assert(setting_values.size() == 2); + String key = setting_values[0], value = setting_values[1]; - if (!deadletter_exchange.empty()) + std::unordered_set integer_settings = {"x-max-length", "x-max-length-bytes", "x-message-ttl", "x-expires", "x-priority", "x-max-priority"}; + std::unordered_set string_settings = {"x-overflow", "x-dead-letter-exchange", "x-queue-type"}; + + if (integer_settings.find(key) != integer_settings.end()) + queue_settings[key] = parse(value); + else if (string_settings.find(key) != string_settings.end()) + queue_settings[key] = value; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported queue setting: {}", value); + } + } + + /// Impose default settings if there are no user-defined settings. + if (!queue_settings.contains("x-max-length")) + { + queue_settings["x-max-length"] = queue_size; + } + if (!queue_settings.contains("x-dead-letter-exchange") && !deadletter_exchange.empty()) + { queue_settings["x-dead-letter-exchange"] = deadletter_exchange; - else + } + else if (!queue_settings.contains("x-overflow")) + { + /// Define x-overflow only if there is not x-dead-letter-exchange, because it will overwrite the expected behaviour. queue_settings["x-overflow"] = "reject-publish"; + } - /* The first option not just simplifies queue_name, but also implements the possibility to be able to resume reading from one - * specific queue when its name is specified in queue_base setting - */ + /// If queue_base - a single name, then it can be used as one specific queue, from which to read. + /// Otherwise it is used as a generator (unique for current table) of queue names, because it allows to + /// maximize performance - via setting `rabbitmq_num_queues`. const String queue_name = !hash_exchange ? queue_base : std::to_string(queue_id) + "_" + queue_base; - /// AMQP::autodelete setting is not allowd, because in case of server restart there will be no consumers and deleting queues should not take place. + /// AMQP::autodelete setting is not allowd, because in case of server restart there will be no consumers + /// and deleting queues should not take place. rabbit_channel.declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); while (!binding_created) @@ -1017,7 +1051,6 @@ void registerStorageRabbitMQ(StorageFactory & factory) CHECK_RABBITMQ_STORAGE_ARGUMENT(1, rabbitmq_host_port) CHECK_RABBITMQ_STORAGE_ARGUMENT(2, rabbitmq_exchange_name) CHECK_RABBITMQ_STORAGE_ARGUMENT(3, rabbitmq_format) - CHECK_RABBITMQ_STORAGE_ARGUMENT(4, rabbitmq_exchange_type) CHECK_RABBITMQ_STORAGE_ARGUMENT(5, rabbitmq_routing_key_list) CHECK_RABBITMQ_STORAGE_ARGUMENT(6, rabbitmq_row_delimiter) @@ -1027,12 +1060,11 @@ void registerStorageRabbitMQ(StorageFactory & factory) CHECK_RABBITMQ_STORAGE_ARGUMENT(10, rabbitmq_queue_base) CHECK_RABBITMQ_STORAGE_ARGUMENT(11, rabbitmq_deadletter_exchange) CHECK_RABBITMQ_STORAGE_ARGUMENT(12, rabbitmq_persistent) - CHECK_RABBITMQ_STORAGE_ARGUMENT(13, rabbitmq_skip_broken_messages) CHECK_RABBITMQ_STORAGE_ARGUMENT(14, rabbitmq_max_block_size) CHECK_RABBITMQ_STORAGE_ARGUMENT(15, rabbitmq_flush_interval_ms) - CHECK_RABBITMQ_STORAGE_ARGUMENT(16, rabbitmq_vhost) + CHECK_RABBITMQ_STORAGE_ARGUMENT(16, rabbitmq_queue_settings_list) #undef CHECK_RABBITMQ_STORAGE_ARGUMENT diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 6df8bd95276..fcf300b30e5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -91,6 +91,7 @@ private: size_t num_consumers; size_t num_queues; String queue_base; + Names queue_settings_list; const String deadletter_exchange; const bool persistent; @@ -139,7 +140,7 @@ private: void loopingFunc(); void connectionFunc(); - static Names parseRoutingKeys(String routing_key_list); + static Names parseSettings(String routing_key_list); static AMQP::ExchangeType defineExchangeType(String exchange_type_); static String getTableBasedName(String name, const StorageID & table_id); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 3e7096a4c55..eb3e21bdccb 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -26,24 +26,13 @@ rabbitmq_id = '' # Helpers def check_rabbitmq_is_available(): - p = subprocess.Popen(('docker', - 'exec', - '-i', - rabbitmq_id, - 'rabbitmqctl', - 'await_startup'), - stdout=subprocess.PIPE) + p = subprocess.Popen(('docker', 'exec', '-i', rabbitmq_id, 'rabbitmqctl', 'await_startup'), stdout=subprocess.PIPE) p.communicate() return p.returncode == 0 def enable_consistent_hash_plugin(): - p = subprocess.Popen(('docker', - 'exec', - '-i', - rabbitmq_id, - "rabbitmq-plugins", "enable", "rabbitmq_consistent_hash_exchange"), - stdout=subprocess.PIPE) + p = subprocess.Popen(('docker', 'exec', '-i', rabbitmq_id, "rabbitmq-plugins", "enable", "rabbitmq_consistent_hash_exchange"), stdout=subprocess.PIPE) p.communicate() return p.returncode == 0 @@ -1835,7 +1824,7 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster): cancel.set() instance.query(''' - DROP TABLE test.rabbitmq; + DETACH TABLE test.rabbitmq; ''') while int(instance.query("SELECT count() FROM system.tables WHERE database='test' AND name='rabbitmq'")) == 1: @@ -1996,9 +1985,8 @@ def test_rabbitmq_vhost(rabbitmq_cluster): @pytest.mark.timeout(120) def test_rabbitmq_drop_table_properly(rabbitmq_cluster): - instance.query('CREATE DATABASE test_database') instance.query(''' - CREATE TABLE test_database.rabbitmq_drop (key UInt64, value UInt64) + CREATE TABLE test.rabbitmq_drop (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'drop', @@ -2013,16 +2001,15 @@ def test_rabbitmq_drop_table_properly(rabbitmq_cluster): channel.basic_publish(exchange='drop', routing_key='', body=json.dumps({'key': 1, 'value': 2})) while True: - result = instance.query('SELECT * FROM test_database.rabbitmq_drop ORDER BY key', ignore_error=True) + result = instance.query('SELECT * FROM test.rabbitmq_drop ORDER BY key', ignore_error=True) if result == "1\t2\n": break exists = channel.queue_declare(queue='rabbit_queue', passive=True) assert(exists) - instance.query("DROP TABLE test_database.rabbitmq_drop") + instance.query("DROP TABLE test.rabbitmq_drop") time.sleep(30) - instance.query("DROP DATABASE test_database") try: exists = channel.queue_declare(callback, queue='rabbit_queue', passive=True) @@ -2032,6 +2019,47 @@ def test_rabbitmq_drop_table_properly(rabbitmq_cluster): assert(not exists) +@pytest.mark.timeout(120) +def test_rabbitmq_queue_settings(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq_settings (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'rabbit_exchange', + rabbitmq_format = 'JSONEachRow', + rabbitmq_queue_base = 'rabbit_queue', + rabbitmq_queue_settings_list = 'x-max-length=10,x-overflow=reject-publish' + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + for i in range(50): + channel.basic_publish(exchange='rabbit_exchange', routing_key='', body=json.dumps({'key': 1, 'value': 2})) + connection.close() + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq_settings; + ''') + + time.sleep(5) + + result = instance.query('SELECT count() FROM test.rabbitmq_settings', ignore_error=True) + while int(result) != 10: + time.sleep(0.5) + result = instance.query('SELECT count() FROM test.view', ignore_error=True) + + # queue size is 10, but 50 messages were sent, they will be dropped (setting x-overflow = reject-publish) and only 10 will remain. + assert(int(result) == 10) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From 2112521304e33f2f343306f3ee59e794be520eb6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 May 2021 19:54:16 +0000 Subject: [PATCH 010/204] Allow all RabbitMQ setup to be done only by user --- src/Storages/RabbitMQ/RabbitMQSettings.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 11 ++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 8 +++ .../integration/test_storage_rabbitmq/test.py | 71 +++++++++++++++---- 4 files changed, 77 insertions(+), 14 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 16d47bda81e..185ee2a6eb2 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -26,6 +26,7 @@ namespace DB M(Milliseconds, rabbitmq_flush_interval_ms, 0, "Timeout for flushing data from RabbitMQ.", 0) \ M(String, rabbitmq_vhost, "/", "RabbitMQ vhost.", 0) \ M(String, rabbitmq_queue_settings_list, "", "A list of rabbitmq queue settings", 0) \ + M(Bool, rabbitmq_queue_consume, false, "Use user-defined queues and do not make any RabbitMQ setup: declaring exchanges, queues, bindings", 0) \ #define LIST_OF_RABBITMQ_SETTINGS(M) \ RABBITMQ_RELATED_SETTINGS(M) \ diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 2aab01bed88..c1e164c9868 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -88,6 +88,7 @@ StorageRabbitMQ::StorageRabbitMQ( , queue_settings_list(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_settings_list.value))) , deadletter_exchange(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_deadletter_exchange.value)) , persistent(rabbitmq_settings->rabbitmq_persistent.value) + , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , address(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_host_port.value)) @@ -262,6 +263,13 @@ size_t StorageRabbitMQ::getMaxBlockSize() const void StorageRabbitMQ::initRabbitMQ() { + if (use_user_setup) + { + queues.emplace_back(queue_base); + rabbit_is_ready = true; + return; + } + RabbitMQChannel rabbit_channel(connection.get()); /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers @@ -1064,7 +1072,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) CHECK_RABBITMQ_STORAGE_ARGUMENT(14, rabbitmq_max_block_size) CHECK_RABBITMQ_STORAGE_ARGUMENT(15, rabbitmq_flush_interval_ms) CHECK_RABBITMQ_STORAGE_ARGUMENT(16, rabbitmq_vhost) - CHECK_RABBITMQ_STORAGE_ARGUMENT(16, rabbitmq_queue_settings_list) + CHECK_RABBITMQ_STORAGE_ARGUMENT(17, rabbitmq_queue_settings_list) + CHECK_RABBITMQ_STORAGE_ARGUMENT(18, rabbitmq_queue_consume) #undef CHECK_RABBITMQ_STORAGE_ARGUMENT diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index fcf300b30e5..664959668b3 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -93,8 +93,15 @@ private: String queue_base; Names queue_settings_list; const String deadletter_exchange; + + /// For insert query. Mark messages as durable. const bool persistent; + /// A table setting. It is possible not to perform any RabbitMQ setup, which is supposed to be consumer-side setup: + /// declaring exchanges, queues, bindings. Instead everything needed from RabbitMQ table is to connect to a specific queue. + /// This solution disables all optimizations and is not really optimal, but allows user to fully control all RabbitMQ setup. + bool use_user_setup; + bool hash_exchange; Poco::Logger * log; String address; @@ -116,6 +123,7 @@ private: /// maximum number of messages in RabbitMQ queue (x-max-length). Also used /// to setup size of inner buffer for received messages uint32_t queue_size; + String sharding_exchange, bridge_exchange, consumer_exchange; size_t consumer_id = 0; /// counter for consumer buffer, needed for channel id std::atomic producer_id = 1; /// counter for producer buffer, needed for channel id diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index eb3e21bdccb..41e6421691d 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1823,23 +1823,12 @@ def test_rabbitmq_commit_on_block_write(rabbitmq_cluster): cancel.set() - instance.query(''' - DETACH TABLE test.rabbitmq; - ''') + instance.query('DETACH TABLE test.rabbitmq;') while int(instance.query("SELECT count() FROM system.tables WHERE database='test' AND name='rabbitmq'")) == 1: time.sleep(1) - instance.query(''' - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) - ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_exchange_name = 'block', - rabbitmq_format = 'JSONEachRow', - rabbitmq_max_block_size = 100, - rabbitmq_queue_base = 'block', - rabbitmq_row_delimiter = '\\n'; - ''') + instance.query('ATTACH TABLE test.rabbitmq;') while int(instance.query('SELECT uniqExact(key) FROM test.view')) < i[0]: time.sleep(1) @@ -2060,6 +2049,62 @@ def test_rabbitmq_queue_settings(rabbitmq_cluster): assert(int(result) == 10) +@pytest.mark.timeout(120) +def test_rabbitmq_queue_consume(rabbitmq_cluster): + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.queue_declare(queue='rabbit_queue', durable=True) + #channel.basic_publish(exchange='', routing_key='rabbit_queue', body=json.dumps({'key': 1, 'value': 2})) + + i = [0] + messages_num = 1000 + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + messages = [] + for _ in range(messages_num): + message = json.dumps({'key': i[0], 'value': i[0]}) + channel.basic_publish(exchange='', routing_key='rabbit_queue', body=message) + i[0] += 1 + + threads = [] + threads_num = 10 + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + instance.query(''' + CREATE TABLE test.rabbitmq_queue (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'drop', + rabbitmq_format = 'JSONEachRow', + rabbitmq_queue_base = 'rabbit_queue', + rabbitmq_queue_consume = 1; + + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq_queue; + ''') + + result = '' + while True: + result = instance.query('SELECT count() FROM test.view') + if int(result) == messages_num * threads_num: + break + time.sleep(1) + + for thread in threads: + thread.join() + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From 973ee4e9c4b693309e88299fcc5d439b38c869cb Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 May 2021 19:57:45 +0000 Subject: [PATCH 011/204] Remove strange code, some fixes --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 + src/Storages/RabbitMQ/RabbitMQHandler.h | 2 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 46 +++++++++---------- .../integration/test_storage_rabbitmq/test.py | 5 +- 4 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 9b5beb0c3f2..07348d9c068 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -53,6 +53,8 @@ void RabbitMQHandler::iterateLoop() uv_run(loop, UV_RUN_NOWAIT); } +/// Do not need synchronization as in iterateLoop(), because this method is used only for +/// initial RabbitMQ setup - at this point there is no background loop thread. int RabbitMQHandler::startBlockingLoop() { /// Return non-zero value, if uv_stop was called while there are still active events. diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 27027e7ab42..bd7b4526e87 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -39,9 +39,11 @@ public: void startLoop(); /// Loop to wait for small tasks in a non-blocking mode. + /// Adds synchronization with main background loop. void iterateLoop(); /// Loop to wait for small tasks in a blocking mode. + /// No synchronization is done with the main loop thread. int startBlockingLoop(); void stopLoop(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index c1e164c9868..a12569ddf40 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -76,22 +76,22 @@ StorageRabbitMQ::StorageRabbitMQ( : IStorage(table_id_) , WithContext(context_->getGlobalContext()) , rabbitmq_settings(std::move(rabbitmq_settings_)) - , exchange_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_exchange_name.value)) - , format_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_format.value)) - , exchange_type(defineExchangeType(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_exchange_type.value))) - , routing_keys(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_routing_key_list.value))) + , exchange_name(rabbitmq_settings->rabbitmq_exchange_name.value) + , format_name(rabbitmq_settings->rabbitmq_format.value) + , exchange_type(defineExchangeType(rabbitmq_settings->rabbitmq_exchange_type.value)) + , routing_keys(parseSettings(rabbitmq_settings->rabbitmq_routing_key_list.value)) , row_delimiter(rabbitmq_settings->rabbitmq_row_delimiter.value) - , schema_name(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_schema.value)) + , schema_name(rabbitmq_settings->rabbitmq_schema.value) , num_consumers(rabbitmq_settings->rabbitmq_num_consumers.value) , num_queues(rabbitmq_settings->rabbitmq_num_queues.value) - , queue_base(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_base.value)) - , queue_settings_list(parseSettings(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_queue_settings_list.value))) - , deadletter_exchange(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_deadletter_exchange.value)) + , queue_base(rabbitmq_settings->rabbitmq_queue_base.value) + , queue_settings_list(parseSettings(rabbitmq_settings->rabbitmq_queue_settings_list.value)) + , deadletter_exchange(rabbitmq_settings->rabbitmq_deadletter_exchange.value) , persistent(rabbitmq_settings->rabbitmq_persistent.value) , use_user_setup(rabbitmq_settings->rabbitmq_queue_consume.value) , hash_exchange(num_consumers > 1 || num_queues > 1) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) - , address(getContext()->getMacros()->expand(rabbitmq_settings->rabbitmq_host_port.value)) + , address(rabbitmq_settings->rabbitmq_host_port.value) , parsed_address(parseAddress(address, 5672)) , login_password(std::make_pair( getContext()->getConfigRef().getString("rabbitmq.username"), @@ -358,7 +358,6 @@ void StorageRabbitMQ::initExchange(RabbitMQChannel & rabbit_channel) void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) { - std::atomic binding_created = false; size_t bound_keys = 0; if (exchange_type == AMQP::ExchangeType::headers) @@ -372,7 +371,7 @@ void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) } rabbit_channel.bindExchange(exchange_name, bridge_exchange, routing_keys[0], bind_headers) - .onSuccess([&]() { binding_created = true; }) + .onSuccess([&]() { event_handler->stopLoop(); }) .onError([&](const char * message) { throw Exception( @@ -384,7 +383,7 @@ void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) else if (exchange_type == AMQP::ExchangeType::fanout || exchange_type == AMQP::ExchangeType::consistent_hash) { rabbit_channel.bindExchange(exchange_name, bridge_exchange, routing_keys[0]) - .onSuccess([&]() { binding_created = true; }) + .onSuccess([&]() { event_handler->stopLoop(); }) .onError([&](const char * message) { throw Exception( @@ -402,7 +401,7 @@ void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) { ++bound_keys; if (bound_keys == routing_keys.size()) - binding_created = true; + event_handler->stopLoop(); }) .onError([&](const char * message) { @@ -414,17 +413,12 @@ void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) } } - while (!binding_created) - { - event_handler->iterateLoop(); - } + event_handler->startBlockingLoop(); } void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channel) { - std::atomic binding_created = false; - auto success_callback = [&](const std::string & queue_name, int msgcount, int /* consumercount */) { queues.emplace_back(queue_name); @@ -438,7 +432,7 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channe * fanout exchange it can be arbitrary */ rabbit_channel.bindQueue(consumer_exchange, queue_name, std::to_string(queue_id)) - .onSuccess([&] { binding_created = true; }) + .onSuccess([&] { event_handler->stopLoop(); }) .onError([&](const char * message) { throw Exception( @@ -507,11 +501,7 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channe /// AMQP::autodelete setting is not allowd, because in case of server restart there will be no consumers /// and deleting queues should not take place. rabbit_channel.declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); - - while (!binding_created) - { - event_handler->iterateLoop(); - } + event_handler->startBlockingLoop(); } @@ -726,6 +716,9 @@ void StorageRabbitMQ::shutdown() /// Therefore, if a table is droppped, a clean up is needed. void StorageRabbitMQ::cleanupRabbitMQ() const { + if (use_user_setup) + return; + RabbitMQChannel rabbit_channel(connection.get()); for (const auto & queue : queues) { @@ -746,6 +739,9 @@ void StorageRabbitMQ::cleanupRabbitMQ() const }); } event_handler->startBlockingLoop(); + + /// Also there is no need to cleanup exchanges as they were created with AMQP::autodelete option. Once queues + /// are removed, exchanges will also be cleaned. } diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 41e6421691d..f6274db3d4c 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2045,6 +2045,8 @@ def test_rabbitmq_queue_settings(rabbitmq_cluster): time.sleep(0.5) result = instance.query('SELECT count() FROM test.view', ignore_error=True) + instance.query('DROP TABLE test.rabbitmq_settings') + # queue size is 10, but 50 messages were sent, they will be dropped (setting x-overflow = reject-publish) and only 10 will remain. assert(int(result) == 10) @@ -2056,7 +2058,6 @@ def test_rabbitmq_queue_consume(rabbitmq_cluster): connection = pika.BlockingConnection(parameters) channel = connection.channel() channel.queue_declare(queue='rabbit_queue', durable=True) - #channel.basic_publish(exchange='', routing_key='rabbit_queue', body=json.dumps({'key': 1, 'value': 2})) i = [0] messages_num = 1000 @@ -2104,6 +2105,8 @@ def test_rabbitmq_queue_consume(rabbitmq_cluster): for thread in threads: thread.join() + instance.query('DROP TABLE test.rabbitmq_queue') + if __name__ == '__main__': cluster.start() From 0b6fb7660b3ea76044d2780ba8e85b845249e30f Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 5 May 2021 07:52:21 +0000 Subject: [PATCH 012/204] Fix checks --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 6 ++---- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 18 +++++++++--------- src/Storages/RabbitMQ/StorageRabbitMQ.h | 2 +- .../integration/test_storage_rabbitmq/test.py | 1 - 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 07348d9c068..c994ab22494 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -55,11 +55,9 @@ void RabbitMQHandler::iterateLoop() /// Do not need synchronization as in iterateLoop(), because this method is used only for /// initial RabbitMQ setup - at this point there is no background loop thread. -int RabbitMQHandler::startBlockingLoop() +void RabbitMQHandler::startBlockingLoop() { - /// Return non-zero value, if uv_stop was called while there are still active events. - int ret = uv_run(loop, UV_RUN_DEFAULT); - return ret; + uv_run(loop, UV_RUN_DEFAULT); } void RabbitMQHandler::stopLoop() diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index bd7b4526e87..f1c7afffc5a 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -44,7 +44,7 @@ public: /// Loop to wait for small tasks in a blocking mode. /// No synchronization is done with the main loop thread. - int startBlockingLoop(); + void startBlockingLoop(); void stopLoop(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index a12569ddf40..130274e2161 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -195,7 +195,7 @@ String StorageRabbitMQ::getTableBasedName(String name, const StorageID & table_i std::shared_ptr StorageRabbitMQ::addSettings(ContextPtr local_context) const { auto modified_context = Context::createCopy(local_context); - modified_context->setSetting("input_format_skip_unknown_fields", true); + modified_context->setSetting("input_format_skip_unknown_fields", 1); modified_context->setSetting("input_format_allow_errors_ratio", 0.); modified_context->setSetting("input_format_allow_errors_num", rabbitmq_settings->rabbitmq_skip_broken_messages.value); @@ -325,8 +325,8 @@ void StorageRabbitMQ::initExchange(RabbitMQChannel & rabbit_channel) AMQP::Table binding_arguments; /// Default routing key property in case of hash exchange is a routing key, which is required to be an integer. - /// Support for arbitrary exchange type (i.e. arbitary pattern of routing keys) requires to eliminate this dependency. - /// This settings changes hash propery to message_id. + /// Support for arbitrary exchange type (i.e. arbitrary pattern of routing keys) requires to eliminate this dependency. + /// This settings changes hash property to message_id. binding_arguments["hash-property"] = "message_id"; /// Declare hash exchange for sharding. @@ -470,7 +470,7 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channe std::unordered_set string_settings = {"x-overflow", "x-dead-letter-exchange", "x-queue-type"}; if (integer_settings.find(key) != integer_settings.end()) - queue_settings[key] = parse(value); + queue_settings[key] = parse(value); else if (string_settings.find(key) != string_settings.end()) queue_settings[key] = value; else @@ -498,7 +498,7 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channe /// maximize performance - via setting `rabbitmq_num_queues`. const String queue_name = !hash_exchange ? queue_base : std::to_string(queue_id) + "_" + queue_base; - /// AMQP::autodelete setting is not allowd, because in case of server restart there will be no consumers + /// AMQP::autodelete setting is not allowed, because in case of server restart there will be no consumers /// and deleting queues should not take place. rabbit_channel.declareQueue(queue_name, AMQP::durable, queue_settings).onSuccess(success_callback).onError(error_callback); event_handler->startBlockingLoop(); @@ -713,7 +713,7 @@ void StorageRabbitMQ::shutdown() /// The only thing publishers are supposed to be aware of is _exchanges_ and queues are a responsibility of a consumer. -/// Therefore, if a table is droppped, a clean up is needed. +/// Therefore, if a table is dropped, a clean up is needed. void StorageRabbitMQ::cleanupRabbitMQ() const { if (use_user_setup) @@ -1038,7 +1038,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) // Check arguments and settings #define CHECK_RABBITMQ_STORAGE_ARGUMENT(ARG_NUM, ARG_NAME) \ /* One of the three required arguments is not specified */ \ - if (args_count < (ARG_NUM) && (ARG_NUM) <= 3 && !rabbitmq_settings->ARG_NAME.changed) \ + if (args_count < (ARG_NUM) && (ARG_NUM) <= 2 && !rabbitmq_settings->ARG_NAME.changed) \ { \ throw Exception("Required parameter '" #ARG_NAME "' for storage RabbitMQ not specified", \ ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); \ @@ -1053,8 +1053,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } CHECK_RABBITMQ_STORAGE_ARGUMENT(1, rabbitmq_host_port) - CHECK_RABBITMQ_STORAGE_ARGUMENT(2, rabbitmq_exchange_name) - CHECK_RABBITMQ_STORAGE_ARGUMENT(3, rabbitmq_format) + CHECK_RABBITMQ_STORAGE_ARGUMENT(2, rabbitmq_format) + CHECK_RABBITMQ_STORAGE_ARGUMENT(3, rabbitmq_exchange_name) CHECK_RABBITMQ_STORAGE_ARGUMENT(4, rabbitmq_exchange_type) CHECK_RABBITMQ_STORAGE_ARGUMENT(5, rabbitmq_routing_key_list) CHECK_RABBITMQ_STORAGE_ARGUMENT(6, rabbitmq_row_delimiter) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 664959668b3..0e47a8e1150 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -148,7 +148,7 @@ private: void loopingFunc(); void connectionFunc(); - static Names parseSettings(String routing_key_list); + static Names parseSettings(String settings_list); static AMQP::ExchangeType defineExchangeType(String exchange_type_); static String getTableBasedName(String name, const StorageID & table_id); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index f6274db3d4c..008f5313e22 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2082,7 +2082,6 @@ def test_rabbitmq_queue_consume(rabbitmq_cluster): CREATE TABLE test.rabbitmq_queue (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_exchange_name = 'drop', rabbitmq_format = 'JSONEachRow', rabbitmq_queue_base = 'rabbit_queue', rabbitmq_queue_consume = 1; From 8703b7863dd043c91520411fb1993a22444c7ada Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 May 2021 23:56:14 +0300 Subject: [PATCH 013/204] Fix distributed_group_by_no_merge=2 with GROUP BY Before this patch the following query: SELECT assumeNotNull(argMax(dummy, 1)) FROM remote('127.1', system.one) SETTINGS distributed_group_by_no_merge = 2 Leads to: Code: 10. DB::Exception: Received from localhost:9000. DB::Exception: Not found column argMax(dummy, 1) in block: while executing 'INPUT : 0 -> argMax(dummy, 1) UInt8 : 0'. Since it tries to execute function one more time, but shards will not send this column when the query processed with distributed_group_by_no_merge=2 (i.e. up to WithMergeableStateAfterAggregation). v0: no exception v2: execut window functions v3: throw exception, since executing window function in this case will lead to messy output --- src/Interpreters/InterpreterSelectQuery.cpp | 7 ++++++- .../00184_shard_distributed_group_by_no_merge.reference | 2 ++ .../00184_shard_distributed_group_by_no_merge.sql | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 16c9731a427..08077bf8e73 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1249,7 +1249,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu // 4) preliminary distinct. // Some of these were already executed at the shards (first_stage), // see the counterpart code and comments there. - if (expressions.need_aggregate) + if (from_aggregation_stage) + { + if (query_analyzer->hasWindow()) + throw Exception("Window functions does not support processing from WithMergeableStateAfterAggregation", ErrorCodes::NOT_IMPLEMENTED); + } + else if (expressions.need_aggregate) { executeExpression(query_plan, expressions.before_window, "Before window functions"); diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference index 453c7fb5af0..b667c57a14c 100644 --- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference +++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference @@ -39,3 +39,5 @@ GROUP BY w/ ALIAS 1 ORDER BY w/ ALIAS 0 +func(aggregate function) GROUP BY +0 diff --git a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql index 9912e083777..cce10312e8f 100644 --- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql +++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.sql @@ -39,4 +39,7 @@ SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) GROUP BY nu SELECT 'ORDER BY w/ ALIAS'; SELECT n FROM remote('127.0.0.{2,3}', currentDatabase(), data_00184) ORDER BY number AS n LIMIT 1 SETTINGS distributed_group_by_no_merge=2; +SELECT 'func(aggregate function) GROUP BY'; +SELECT assumeNotNull(argMax(dummy, 1)) FROM remote('127.1', system.one) SETTINGS distributed_group_by_no_merge=2; + drop table data_00184; From e5cff95cc86abc61a8fe4eb3864cd804d11c5b26 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 May 2021 23:56:14 +0300 Subject: [PATCH 014/204] Avoid deadlock due to Log usage in 01568_window_functions_distributed --- .../0_stateless/01568_window_functions_distributed.reference | 2 +- .../queries/0_stateless/01568_window_functions_distributed.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference index 29d3e5ea885..2de3bd99ea2 100644 --- a/tests/queries/0_stateless/01568_window_functions_distributed.reference +++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference @@ -10,7 +10,7 @@ select max(identity(dummy + 1)) over () from remote('127.0.0.{1,2}', system, one 1 1 drop table if exists t_01568; -create table t_01568 engine Log as select intDiv(number, 3) p, number from numbers(9); +create table t_01568 engine Memory as select intDiv(number, 3) p, number from numbers(9); select sum(number) over w, max(number) over w from t_01568 window w as (partition by p); 3 2 3 2 diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql index 7d9d1ea5c92..7c1a91a9c35 100644 --- a/tests/queries/0_stateless/01568_window_functions_distributed.sql +++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql @@ -9,7 +9,7 @@ select max(identity(dummy + 1)) over () from remote('127.0.0.{1,2}', system, one drop table if exists t_01568; -create table t_01568 engine Log as select intDiv(number, 3) p, number from numbers(9); +create table t_01568 engine Memory as select intDiv(number, 3) p, number from numbers(9); select sum(number) over w, max(number) over w from t_01568 window w as (partition by p); From 6ac50ab9c5d570b9806d87392e2fa4caa6dba597 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 5 May 2021 23:56:15 +0300 Subject: [PATCH 015/204] Extend 01568_window_functions_distributed for distributed_group_by_no_merge --- .../01568_window_functions_distributed.reference | 8 ++++++++ .../0_stateless/01568_window_functions_distributed.sql | 2 ++ 2 files changed, 10 insertions(+) diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.reference b/tests/queries/0_stateless/01568_window_functions_distributed.reference index 2de3bd99ea2..7d5a95046f7 100644 --- a/tests/queries/0_stateless/01568_window_functions_distributed.reference +++ b/tests/queries/0_stateless/01568_window_functions_distributed.reference @@ -49,4 +49,12 @@ select groupArray(groupArray(number)) over (rows unbounded preceding) from remot [[0,3,6,0,3,6]] [[0,3,6,0,3,6],[1,4,7,1,4,7]] [[0,3,6,0,3,6],[1,4,7,1,4,7],[2,5,8,2,5,8]] +select groupArray(groupArray(number)) over (rows unbounded preceding) from remote('127.0.0.{1,2}', '', t_01568) group by mod(number, 3) settings distributed_group_by_no_merge=1; +[[0,3,6]] +[[0,3,6],[1,4,7]] +[[0,3,6],[1,4,7],[2,5,8]] +[[0,3,6]] +[[0,3,6],[1,4,7]] +[[0,3,6],[1,4,7],[2,5,8]] +select groupArray(groupArray(number)) over (rows unbounded preceding) from remote('127.0.0.{1,2}', '', t_01568) group by mod(number, 3) settings distributed_group_by_no_merge=2; -- { serverError 48 } drop table t_01568; diff --git a/tests/queries/0_stateless/01568_window_functions_distributed.sql b/tests/queries/0_stateless/01568_window_functions_distributed.sql index 7c1a91a9c35..bc82e1ed6ac 100644 --- a/tests/queries/0_stateless/01568_window_functions_distributed.sql +++ b/tests/queries/0_stateless/01568_window_functions_distributed.sql @@ -19,5 +19,7 @@ select distinct sum(number) over w, max(number) over w from remote('127.0.0.{1,2 -- window functions + aggregation w/shards select groupArray(groupArray(number)) over (rows unbounded preceding) from remote('127.0.0.{1,2}', '', t_01568) group by mod(number, 3); +select groupArray(groupArray(number)) over (rows unbounded preceding) from remote('127.0.0.{1,2}', '', t_01568) group by mod(number, 3) settings distributed_group_by_no_merge=1; +select groupArray(groupArray(number)) over (rows unbounded preceding) from remote('127.0.0.{1,2}', '', t_01568) group by mod(number, 3) settings distributed_group_by_no_merge=2; -- { serverError 48 } drop table t_01568; From eefd67fce592dc727761e661b817b985d4374184 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 May 2021 00:26:14 +0300 Subject: [PATCH 016/204] Disable optimize_distributed_group_by_sharding_key with window functions --- src/Interpreters/InterpreterSelectQuery.cpp | 1 + src/Storages/SelectQueryInfo.h | 3 +++ src/Storages/StorageDistributed.cpp | 10 +++++++--- ...ptimize_distributed_group_by_sharding_key.reference | 3 +++ ...1244_optimize_distributed_group_by_sharding_key.sql | 4 ++++ 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 08077bf8e73..be872a232c3 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -578,6 +578,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); query_info.query = query_ptr; + query_info.has_window = query_analyzer->hasWindow(); if (storage && !options.only_analyze) from_stage = storage->getQueryProcessingStage(context, options.to_stage, query_info); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index b4ac07c612a..7d33172b703 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -139,6 +139,9 @@ struct SelectQueryInfo /// Example: x IN (1, 2, 3) PreparedSets sets; + /// Cached value of ExpressionAnalysisResult::has_window + bool has_window = false; + ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; } }; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a402c3e0218..718a15888ea 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -285,9 +285,9 @@ void replaceConstantExpressions( /// - QueryProcessingStage::Complete /// - QueryProcessingStage::WithMergeableStateAfterAggregation /// - none (in this case regular WithMergeableState should be used) -std::optional getOptimizedQueryProcessingStage(const ASTPtr & query_ptr, bool extremes, const Block & sharding_key_block) +std::optional getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, bool extremes, const Block & sharding_key_block) { - const auto & select = query_ptr->as(); + const auto & select = query_info.query->as(); auto sharding_block_has = [&](const auto & exprs, size_t limit = SIZE_MAX) -> bool { @@ -314,6 +314,10 @@ std::optional getOptimizedQueryProcessingStage(const if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube) return {}; + // Window functions are not supported. + if (query_info.has_window) + return {}; + // TODO: extremes support can be implemented if (extremes) return {}; @@ -510,7 +514,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( (settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic)) { Block sharding_key_block = sharding_key_expr->getSampleBlock(); - auto stage = getOptimizedQueryProcessingStage(query_info.query, settings.extremes, sharding_key_block); + auto stage = getOptimizedQueryProcessingStage(query_info, settings.extremes, sharding_key_block); if (stage) { LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage)); diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference index d1697bd2310..acaf6531101 100644 --- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.reference @@ -123,3 +123,6 @@ GROUP BY sharding_key, ... GROUP BY ..., sharding_key 0 0 1 0 +window functions +0 0 +1 0 diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql index 2f77155cc54..6b6300a4871 100644 --- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql @@ -106,5 +106,9 @@ select * from dist_01247 group by key, value; select 'GROUP BY ..., sharding_key'; select * from dist_01247 group by value, key; +-- window functions +select 'window functions'; +select key, sum(sum(value)) over (rows unbounded preceding) from dist_01247 group by key settings allow_experimental_window_functions=1; + drop table dist_01247; drop table data_01247; From 308b964ca4f472e3f748f4dbdd58b4f4218119c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=87=E5=BA=B7?= Date: Mon, 10 May 2021 20:30:52 +0800 Subject: [PATCH 017/204] enable DateTime64 to be version column --- src/DataTypes/DataTypeDateTime64.h | 2 ++ src/DataTypes/IDataType.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index f51e0f5d047..ac0f49613a2 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -35,6 +35,8 @@ public: bool canBePromoted() const override { return false; } + bool canBeUsedAsVersion() const override { return true; } + protected: SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 5a676819b77..fdf4973fce9 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -177,7 +177,7 @@ public: */ virtual bool canBeComparedWithCollation() const { return false; } - /** If the type is totally comparable (Ints, Date, DateTime, not nullable, not floats) + /** If the type is totally comparable (Ints, Date, DateTime, DateTime64, not nullable, not floats) * and "simple" enough (not String, FixedString) to be used as version number * (to select rows with maximum version). */ diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 854b64181cc..22c8b1e5978 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -630,7 +630,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat throw Exception("The column " + version_column + " cannot be used as a version column for storage " + storage + " because it is of type " + column.type->getName() + - " (must be of an integer type or of type Date or DateTime)", ErrorCodes::BAD_TYPE_OF_FIELD); + " (must be of an integer type or of type Date/DateTime/DateTime64)", ErrorCodes::BAD_TYPE_OF_FIELD); miss_column = false; break; } From eef899ae63f05903c636badd9bb3729d17a53d22 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 10 May 2021 21:03:37 +0300 Subject: [PATCH 018/204] better code, add test --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 8 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 67 ++-------- .../MergeTree/ReplicatedMergeTreeQueue.h | 11 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- .../01154_move_partition.reference | 1 + .../0_stateless/01154_move_partition.sh | 117 ++++++++++++++++++ 8 files changed, 138 insertions(+), 72 deletions(-) create mode 100644 tests/queries/0_stateless/01154_move_partition.reference create mode 100755 tests/queries/0_stateless/01154_move_partition.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5d1400138b4..80d50e199cd 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2275,7 +2275,7 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c if (part->info.partition_id != drop_range.partition_id) throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - if (part->info.min_block < drop_range.min_block) + if (part->info.min_block < drop_range.min_block) /// Always false, because drop_range.min_block == 0 { if (drop_range.min_block <= part->info.max_block) { diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index dfebd88abe9..3eb9ef1b481 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -250,7 +250,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( prev_part = nullptr; } - /// Check predicate only for first part in each partition. + /// Check predicate only for the first part in each range. if (!prev_part) { /* Parts can be merged with themselves for TTL needs for example. @@ -267,8 +267,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (!can_merge_callback(*prev_part, part, nullptr)) { /// Starting new interval in the same partition - if (!parts_ranges.back().empty()) - parts_ranges.emplace_back(); + assert(!parts_ranges.back().empty()); + parts_ranges.emplace_back(); /// Now we have no previous part, but it affects only logging prev_part = nullptr; @@ -292,7 +292,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (prev_part && part->info.partition_id == (*prev_part)->info.partition_id && part->info.min_block <= (*prev_part)->info.max_block) { - LOG_ERROR(log, "Part {} intersects previous part {}", part->name, (*prev_part)->name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}", part->name, (*prev_part)->name); } prev_part = ∂ diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index ad41bbe1a08..7a0f60ff327 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -926,12 +926,12 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( { auto type = (*it)->type; - if (((type == LogEntry::GET_PART || - type == LogEntry::ATTACH_PART || - type == LogEntry::MERGE_PARTS || - type == LogEntry::MUTATE_PART) - && part_info.contains(MergeTreePartInfo::fromPartName((*it)->new_part_name, format_version))) - || checkReplaceRangeCanBeRemoved(part_info, *it, current)) + bool is_simple_producing_op = type == LogEntry::GET_PART || + type == LogEntry::ATTACH_PART || + type == LogEntry::MERGE_PARTS || + type == LogEntry::MUTATE_PART; + bool simple_op_covered = is_simple_producing_op && part_info.contains(MergeTreePartInfo::fromPartName((*it)->new_part_name, format_version)); + if (simple_op_covered || checkReplaceRangeCanBeRemoved(part_info, *it, current)) { if ((*it)->currently_executing) to_wait.push_back(*it); @@ -964,50 +964,6 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( } -size_t ReplicatedMergeTreeQueue::getConflictsCountForRange( - const MergeTreePartInfo & range, const LogEntry & entry, - String * out_description, std::lock_guard & /* queue_lock */) const -{ - std::vector> conflicts; - - for (const auto & future_part_elem : future_parts) - { - /// Do not check itself log entry - if (future_part_elem.second->znode_name == entry.znode_name) - continue; - - if (!range.isDisjoint(MergeTreePartInfo::fromPartName(future_part_elem.first, format_version))) - { - conflicts.emplace_back(future_part_elem.first, future_part_elem.second); - continue; - } - } - - if (out_description) - { - WriteBufferFromOwnString ss; - ss << "Can't execute command for range " << range.getPartName() << " (entry " << entry.znode_name << "). "; - ss << "There are " << conflicts.size() << " currently executing entries blocking it: "; - for (const auto & conflict : conflicts) - ss << conflict.second->typeToString() << " part " << conflict.first << ", "; - - *out_description = ss.str(); - } - - return conflicts.size(); -} - - -void ReplicatedMergeTreeQueue::checkThereAreNoConflictsInRange(const MergeTreePartInfo & range, const LogEntry & entry) -{ - String conflicts_description; - std::lock_guard lock(state_mutex); - - if (0 != getConflictsCountForRange(range, entry, &conflicts_description, lock)) - throw Exception(conflicts_description, ErrorCodes::UNFINISHED); -} - - bool ReplicatedMergeTreeQueue::isNotCoveredByFuturePartsImpl(const String & log_entry_name, const String & new_part_name, String & out_reason, std::lock_guard & /* queue_lock */) const { @@ -1625,8 +1581,11 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep void ReplicatedMergeTreeQueue::disableMergesInBlockRange(const String & part_name) { - std::lock_guard lock(state_mutex); - virtual_parts.add(part_name); + { + std::lock_guard lock(state_mutex); + virtual_parts.add(part_name); + } + std::this_thread::sleep_for(std::chrono::milliseconds(500)); //FIXME } @@ -1912,9 +1871,7 @@ bool ReplicatedMergeTreeMergePredicate::canMergeTwoParts( if (left->info.partition_id != right->info.partition_id) { - if (out_reason) - *out_reason = "Parts " + left->name + " and " + right->name + " belong to different partitions"; - return false; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Parts {} and {} belong to different partitions", left->name, right->name); } for (const MergeTreeData::DataPartPtr & part : {left, right}) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 00ef3ee7292..5c59e700db9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -237,11 +237,6 @@ private: std::optional min_unprocessed_insert_time_changed, std::optional max_processed_insert_time_changed) const; - /// Returns list of currently executing parts blocking execution a command modifying specified range - size_t getConflictsCountForRange( - const MergeTreePartInfo & range, const LogEntry & entry, String * out_description, - std::lock_guard & state_lock) const; - /// Marks the element of the queue as running. class CurrentlyExecuting { @@ -322,10 +317,6 @@ public: */ void removePartProducingOpsInRange(zkutil::ZooKeeperPtr zookeeper, const MergeTreePartInfo & part_info, const ReplicatedMergeTreeLogEntryData & current); - /** Throws and exception if there are currently executing entries in the range . - */ - void checkThereAreNoConflictsInRange(const MergeTreePartInfo & range, const LogEntry & entry); - /** In the case where there are not enough parts to perform the merge in part_name * - move actions with merged parts to the end of the queue * (in order to download a already merged part from another replica). @@ -477,7 +468,7 @@ public: /// Can we assign a merge this part and some other part? /// For example a merge of a part and itself is needed for TTL. - /// This predicate is checked for the first part of each partitition. + /// This predicate is checked for the first part of each range. bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String * out_reason) const; /// Return nonempty optional of desired mutation version and alter version. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b3165febd7c..f34e93e212f 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -685,7 +685,7 @@ std::shared_ptr StorageMergeTree::se auto can_merge = [this, &lock] (const DataPartPtr & left, const DataPartPtr & right, String *) -> bool { - /// This predicate is checked for the first part of each partition. + /// This predicate is checked for the first part of each range. /// (left = nullptr, right = "first part of partition") if (!left) return !currently_merging_mutating_parts.count(right); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3b4a1ec4e16..278dd1da36d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6321,7 +6321,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta MergeTreePartInfo drop_range_dest; drop_range_dest.partition_id = drop_range.partition_id; drop_range_dest.max_block = drop_range.max_block; - drop_range_dest.min_block = drop_range.max_block; + drop_range_dest.min_block = drop_range.max_block; //FIXME typo? drop_range_dest.level = drop_range.level; drop_range_dest.mutation = drop_range.mutation; diff --git a/tests/queries/0_stateless/01154_move_partition.reference b/tests/queries/0_stateless/01154_move_partition.reference new file mode 100644 index 00000000000..c6d9204ed02 --- /dev/null +++ b/tests/queries/0_stateless/01154_move_partition.reference @@ -0,0 +1 @@ +Replication did not hang diff --git a/tests/queries/0_stateless/01154_move_partition.sh b/tests/queries/0_stateless/01154_move_partition.sh new file mode 100755 index 00000000000..3aef2047b69 --- /dev/null +++ b/tests/queries/0_stateless/01154_move_partition.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +declare -A engines +engines[0]="MergeTree" +engines[1]="ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/src', toString(randConstant()))" +engines[2]="ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/src_' || toString(randConstant()), 'single_replica')" + +for ((i=0; i<16; i++)) do + $CLICKHOUSE_CLIENT -q "CREATE TABLE dst_$i (p UInt64, k UInt64, v UInt64) + ENGINE=ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/dst', '$i') + PARTITION BY p % 10 ORDER BY k" 2>&1| grep -Pv "Retrying createReplica|created by another server at the same moment, will retry" & + engine=${engines[$((i % ${#engines[@]}))]} + $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$i (p UInt64, k UInt64, v UInt64) ENGINE=$engine + PARTITION BY p % 10 ORDER BY k" 2>&1| grep -Pv "Retrying createReplica|created by another server at the same moment, will retry" & +done +wait + +function create_drop_thread() +{ + while true; do + REPLICA=$(($RANDOM % 16)) + $CLICKHOUSE_CLIENT -q "DROP TABLE src_$REPLICA;" + arr=("$@") + engine=${arr[$RANDOM % ${#arr[@]}]} + $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$REPLICA (p UInt64, k UInt64, v UInt64) ENGINE=$engine PARTITION BY p % 10 ORDER BY k" + sleep 0.$RANDOM; + done +} + +function insert_thread() +{ + while true; do + REPLICA=$(($RANDOM % 16)) + LIMIT=$(($RANDOM % 100)) + $CLICKHOUSE_CLIENT -q "INSERT INTO $1_$REPLICA SELECT * FROM generateRandom('p UInt64, k UInt64, v UInt64') LIMIT $LIMIT" 2>/dev/null + done +} + +function move_partition_src_dst_thread() +{ + while true; do + FROM_REPLICA=$(($RANDOM % 16)) + TO_REPLICA=$(($RANDOM % 16)) + PARTITION=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -q "ALTER TABLE src_$FROM_REPLICA MOVE PARTITION $PARTITION TO TABLE dst_$TO_REPLICA" 2>/dev/null + sleep 0.$RANDOM; + done +} + +function replace_partition_src_src_thread() +{ + while true; do + FROM_REPLICA=$(($RANDOM % 16)) + TO_REPLICA=$(($RANDOM % 16)) + PARTITION=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -q "ALTER TABLE src_$TO_REPLICA REPLACE PARTITION $PARTITION FROM src_$FROM_REPLICA" 2>/dev/null + sleep 0.$RANDOM; + done +} + +function drop_partition_thread() +{ + while true; do + REPLICA=$(($RANDOM % 16)) + PARTITION=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$TO_REPLICA DROP PARTITION $PARTITION" 2>/dev/null + sleep 0.$RANDOM; + done +} + +function optimize_thread() +{ + while true; do + REPLICA=$(($RANDOM % 16)) + TABLE="src_" + if (( RANDOM % 2 )); then + TABLE="dst_" + fi + $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE ${TABLE}_$REPLICA" 2>/dev/null + sleep 0.$RANDOM; + done +} + +export -f create_drop_thread; +export -f insert_thread; +export -f move_partition_src_dst_thread; +export -f replace_partition_src_src_thread; +export -f drop_partition_thread; +export -f optimize_thread; + +TIMEOUT=300 + +timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & +timeout $TIMEOUT bash -c 'insert_thread src' & +timeout $TIMEOUT bash -c 'insert_thread src' & +timeout $TIMEOUT bash -c 'insert_thread dst' & +timeout $TIMEOUT bash -c move_partition_src_dst_thread & +timeout $TIMEOUT bash -c replace_partition_src_src_thread & +timeout $TIMEOUT bash -c drop_partition_thread & +timeout $TIMEOUT bash -c optimize_thread & +wait + +for ((i=0; i<16; i++)) do + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA dst_$i" + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null # table may not exist +done +echo "Replication did not hang" + +for ((i=0; i<16; i++)) do + $CLICKHOUSE_CLIENT -q "DROP TABLE dst_$i" & + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS src_$i" & +done +wait From 547e1f5a23637eb426c4e206f80e128d3eb99544 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Mon, 10 May 2021 22:31:13 +0300 Subject: [PATCH 019/204] Update 01154_move_partition.sh --- tests/queries/0_stateless/01154_move_partition.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01154_move_partition.sh b/tests/queries/0_stateless/01154_move_partition.sh index 3aef2047b69..377af71ee04 100755 --- a/tests/queries/0_stateless/01154_move_partition.sh +++ b/tests/queries/0_stateless/01154_move_partition.sh @@ -92,7 +92,7 @@ export -f replace_partition_src_src_thread; export -f drop_partition_thread; export -f optimize_thread; -TIMEOUT=300 +TIMEOUT=100 timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & timeout $TIMEOUT bash -c 'insert_thread src' & From 04f00b2b42465092f0d710dc3717103ba45382b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=B8=87=E5=BA=B7?= Date: Tue, 11 May 2021 20:00:34 +0800 Subject: [PATCH 020/204] add tests --- ...67_support_datetime64_version_column.reference | 4 ++++ .../01867_support_datetime64_version_column.sql | 15 +++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/01867_support_datetime64_version_column.reference create mode 100644 tests/queries/0_stateless/01867_support_datetime64_version_column.sql diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.reference b/tests/queries/0_stateless/01867_support_datetime64_version_column.reference new file mode 100644 index 00000000000..f449cef23d3 --- /dev/null +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.reference @@ -0,0 +1,4 @@ +1 1970-01-01 08:25:46.300800003 a1 +2 1970-01-01 08:25:46.300800004 b1 +3 1970-01-01 08:25:46.300800005 c1 +2 1970-01-01 08:25:46.300800005 a1 diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql new file mode 100644 index 00000000000..d497cf1b2f5 --- /dev/null +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql @@ -0,0 +1,15 @@ +create table replacing( `A` Int64, `D` DateTime64(9), `S` String) ENGINE = ReplacingMergeTree(D) ORDER BY A; + +insert into replacing values (1,'1970-01-01 08:25:46.300800000','a'); +insert into replacing values (2,'1970-01-01 08:25:46.300800002','b'); +insert into replacing values (1,'1970-01-01 08:25:46.300800003','a1'); +insert into replacing values (1,'1970-01-01 08:25:46.300800002','a2'); +insert into replacing values (2,'1970-01-01 08:25:46.300800004','b1'); +insert into replacing values (3,'1970-01-01 08:25:46.300800005','c1'); +insert into replacing values (2,'1970-01-01 08:25:46.300800005','a1'); + +OPTIMIZE TABLE replacing; + +select * from replacing; + +drop table replacing; \ No newline at end of file From 3bb480bd376ee8bdf9090fc8ea9e26d46a6aeb5c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 11 May 2021 15:29:37 +0300 Subject: [PATCH 021/204] Update 01867_support_datetime64_version_column.sql --- .../0_stateless/01867_support_datetime64_version_column.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql index d497cf1b2f5..9e7d487ea74 100644 --- a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql @@ -1,3 +1,4 @@ +drop table if exists replacing; create table replacing( `A` Int64, `D` DateTime64(9), `S` String) ENGINE = ReplacingMergeTree(D) ORDER BY A; insert into replacing values (1,'1970-01-01 08:25:46.300800000','a'); @@ -12,4 +13,4 @@ OPTIMIZE TABLE replacing; select * from replacing; -drop table replacing; \ No newline at end of file +drop table replacing; From f6c9327fd55c9136d4a8d04277bbfbccefb7a0bd Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 11 May 2021 15:29:48 +0300 Subject: [PATCH 022/204] Update 01867_support_datetime64_version_column.sql --- .../0_stateless/01867_support_datetime64_version_column.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql index 9e7d487ea74..f4427be635a 100644 --- a/tests/queries/0_stateless/01867_support_datetime64_version_column.sql +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.sql @@ -9,7 +9,7 @@ insert into replacing values (2,'1970-01-01 08:25:46.300800004','b1'); insert into replacing values (3,'1970-01-01 08:25:46.300800005','c1'); insert into replacing values (2,'1970-01-01 08:25:46.300800005','a1'); -OPTIMIZE TABLE replacing; +OPTIMIZE TABLE replacing FINAL; select * from replacing; From 430f5582343fc90c427b971654e8940925d21ad4 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Tue, 11 May 2021 15:31:13 +0300 Subject: [PATCH 023/204] Update 01154_move_partition.sh --- .../0_stateless/01154_move_partition.sh | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/01154_move_partition.sh b/tests/queries/0_stateless/01154_move_partition.sh index 377af71ee04..e5134b1f773 100755 --- a/tests/queries/0_stateless/01154_move_partition.sh +++ b/tests/queries/0_stateless/01154_move_partition.sh @@ -19,17 +19,17 @@ for ((i=0; i<16; i++)) do done wait -function create_drop_thread() -{ - while true; do - REPLICA=$(($RANDOM % 16)) - $CLICKHOUSE_CLIENT -q "DROP TABLE src_$REPLICA;" - arr=("$@") - engine=${arr[$RANDOM % ${#arr[@]}]} - $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$REPLICA (p UInt64, k UInt64, v UInt64) ENGINE=$engine PARTITION BY p % 10 ORDER BY k" - sleep 0.$RANDOM; - done -} +#function create_drop_thread() +#{ +# while true; do +# REPLICA=$(($RANDOM % 16)) +# $CLICKHOUSE_CLIENT -q "DROP TABLE src_$REPLICA;" +# arr=("$@") +# engine=${arr[$RANDOM % ${#arr[@]}]} +# $CLICKHOUSE_CLIENT -q "CREATE TABLE src_$REPLICA (p UInt64, k UInt64, v UInt64) ENGINE=$engine PARTITION BY p % 10 ORDER BY k" +# sleep 0.$RANDOM; +# done +#} function insert_thread() { @@ -85,7 +85,7 @@ function optimize_thread() done } -export -f create_drop_thread; +#export -f create_drop_thread; export -f insert_thread; export -f move_partition_src_dst_thread; export -f replace_partition_src_src_thread; @@ -94,7 +94,7 @@ export -f optimize_thread; TIMEOUT=100 -timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & +#timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & timeout $TIMEOUT bash -c 'insert_thread src' & timeout $TIMEOUT bash -c 'insert_thread src' & timeout $TIMEOUT bash -c 'insert_thread dst' & @@ -106,7 +106,7 @@ wait for ((i=0; i<16; i++)) do $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA dst_$i" - $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null # table may not exist + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null done echo "Replication did not hang" From e8524d6e69eeb831e319c9d21e86de58ddcad56c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 12 May 2021 01:19:51 +0300 Subject: [PATCH 024/204] Update 01867_support_datetime64_version_column.reference --- .../01867_support_datetime64_version_column.reference | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01867_support_datetime64_version_column.reference b/tests/queries/0_stateless/01867_support_datetime64_version_column.reference index f449cef23d3..3a6dbdc870c 100644 --- a/tests/queries/0_stateless/01867_support_datetime64_version_column.reference +++ b/tests/queries/0_stateless/01867_support_datetime64_version_column.reference @@ -1,4 +1,3 @@ 1 1970-01-01 08:25:46.300800003 a1 -2 1970-01-01 08:25:46.300800004 b1 -3 1970-01-01 08:25:46.300800005 c1 2 1970-01-01 08:25:46.300800005 a1 +3 1970-01-01 08:25:46.300800005 c1 From ffb4e1af8f9180b00d99c8a63d13278830377321 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 13:39:07 +0300 Subject: [PATCH 025/204] Some copy-paste --- programs/CMakeLists.txt | 26 +- programs/config_tools.h.in | 1 + programs/keeper/CMakeLists.txt | 14 + programs/keeper/Keeper.cpp | 421 ++++++++++++++++++++++++++ programs/keeper/Keeper.h | 64 ++++ programs/keeper/clickhouse-keeper.cpp | 6 + programs/main.cpp | 6 + 7 files changed, 534 insertions(+), 4 deletions(-) create mode 100644 programs/keeper/CMakeLists.txt create mode 100644 programs/keeper/Keeper.cpp create mode 100644 programs/keeper/Keeper.h create mode 100644 programs/keeper/clickhouse-keeper.cpp diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 09199e83026..500e4794335 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -47,6 +47,9 @@ option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Li option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" ${ENABLE_CLICKHOUSE_ALL}) + +option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL}) + if (CLICKHOUSE_SPLIT_BINARY) option(ENABLE_CLICKHOUSE_INSTALL "Install ClickHouse without .deb/.rpm/.tgz packages (having the binary only)" OFF) else () @@ -134,6 +137,12 @@ else() message(STATUS "ClickHouse git-import: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER) + message(STATUS "ClickHouse keeper mode: ON") +else() + message(STATUS "ClickHouse keeper mode: OFF") +endif() + if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES)) set(CLICKHOUSE_ONE_SHARED ON) endif() @@ -202,6 +211,7 @@ add_subdirectory (obfuscator) add_subdirectory (install) add_subdirectory (git-import) add_subdirectory (bash-completion) +add_subdirectory (keeper) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) @@ -212,15 +222,15 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) - target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) + add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES}) + target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK}) + target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} ${CLICKHOUSE_KEEPER_INCLUDE}) set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() if (CLICKHOUSE_SPLIT_BINARY) - set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-git-import clickhouse-copier) + set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-obfuscator clickhouse-git-import clickhouse-copier clickhouse-keeper) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) @@ -277,6 +287,9 @@ else () if (ENABLE_CLICKHOUSE_GIT_IMPORT) clickhouse_target_link_split_lib(clickhouse git-import) endif () + if (ENABLE_CLICKHOUSE_KEEPER) + clickhouse_target_link_split_lib(clickhouse keeper) + endif() if (ENABLE_CLICKHOUSE_INSTALL) clickhouse_target_link_split_lib(clickhouse install) endif () @@ -332,6 +345,11 @@ else () install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) endif () + if (ENABLE_CLICKHOUSE_KEEPER) + add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper) + endif () install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index abe9ef8c562..50ba0c16a83 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -16,3 +16,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_INSTALL #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE +#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt new file mode 100644 index 00000000000..8f40fe54667 --- /dev/null +++ b/programs/keeper/CMakeLists.txt @@ -0,0 +1,14 @@ +set(CLICKHOUSE_KEEPER_SOURCES + Keeper.cpp +) + +set (CLICKHOUSE_KEEPER_LINK + PRIVATE + clickhouse_common_config + clickhouse_common_io + clickhouse_common_zookeeper + daemon + dbms +) + +clickhouse_program_add(keeper) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp new file mode 100644 index 00000000000..78c9e7d50b9 --- /dev/null +++ b/programs/keeper/Keeper.cpp @@ -0,0 +1,421 @@ +#include "Keeper.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if !defined(ARCADIA_BUILD) +# include "config_core.h" +# include "Common/config_version.h" +#endif + +#if USE_SSL +# include +# include +#endif + +#if USE_NURAFT +# include +#endif + +int mainEntryClickHouseKeeper(int argc, char ** argv) +{ + DB::Keeper app; + + try + { + return app.run(argc, argv); + } + catch (...) + { + std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; + } +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NO_ELEMENTS_IN_CONFIG; + extern const int SUPPORT_IS_DISABLED; + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int EXCESSIVE_ELEMENT_IN_CONFIG; + extern const int INVALID_CONFIG_PARAMETER; + extern const int SYSTEM_ERROR; + extern const int FAILED_TO_GETPWUID; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; + extern const int NETWORK_ERROR; + extern const int CORRUPTED_DATA; +} + +namespace +{ + +int waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +{ + const int sleep_max_ms = 1000 * seconds_to_wait; + const int sleep_one_ms = 100; + int sleep_current_ms = 0; + int current_connections = 0; + for (;;) + { + current_connections = 0; + + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } + + if (!current_connections) + break; + + sleep_current_ms += sleep_one_ms; + if (sleep_current_ms < sleep_max_ms) + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms)); + else + break; + } + return current_connections; +} + +Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port, Poco::Logger * log) +{ + Poco::Net::SocketAddress socket_address; + try + { + socket_address = Poco::Net::SocketAddress(host, port); + } + catch (const Poco::Net::DNSException & e) + { + const auto code = e.code(); + if (code == EAI_FAMILY +#if defined(EAI_ADDRFAMILY) + || code == EAI_ADDRFAMILY +#endif + ) + { + LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. " + "If it is an IPv6 address and your host has disabled IPv6, then consider to " + "specify IPv4 address to listen in element of configuration " + "file. Example: 0.0.0.0", + host, e.code(), e.message()); + } + + throw; + } + return socket_address; +} + +std::string getCanonicalPath(std::string && path) +{ + Poco::trimInPlace(path); + if (path.empty()) + throw Exception("path configuration parameter is empty", ErrorCodes::INVALID_CONFIG_PARAMETER); + if (path.back() != '/') + path += '/'; + return std::move(path); +} + +[[noreturn]] void forceShutdown() +{ +#if defined(THREAD_SANITIZER) && defined(OS_LINUX) + /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, + /// while connection handling threads are still run. + (void)syscall(SYS_exit_group, 0); + __builtin_unreachable(); +#else + _exit(0); +#endif +} + +} + + +Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const +{ + auto address = makeSocketAddress(host, port, &logger()); +#if !defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION < 0x01090100 + if (secure) + /// Bug in old (<1.9.1) poco, listen() after bind() with reusePort param will fail because have no implementation in SecureServerSocketImpl + /// https://github.com/pocoproject/poco/pull/2257 + socket.bind(address, /* reuseAddress = */ true); + else +#endif +#if POCO_VERSION < 0x01080000 + socket.bind(address, /* reuseAddress = */ true); +#else + socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config().getBool("listen_reuse_port", false)); +#endif + + socket.listen(/* backlog = */ config().getUInt("listen_backlog", 64)); + + return address; +} + +void Keeper::createServer(const std::string & listen_host, const char * port_name, bool listen_try, CreateServerFunc && func) const +{ + /// For testing purposes, user may omit tcp_port or http_port or https_port in configuration file. + if (!config().has(port_name)) + return; + + auto port = config().getInt(port_name); + try + { + func(port); + } + catch (const Poco::Exception &) + { + std::string message = "Listen [" + listen_host + "]:" + std::to_string(port) + " failed: " + getCurrentExceptionMessage(false); + + if (listen_try) + { + LOG_WARNING(&logger(), "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + "specify not disabled IPv4 or IPv6 address to listen in element of configuration " + "file. Example for disabled IPv6: 0.0.0.0 ." + " Example for disabled IPv4: ::", + message); + } + else + { + throw Exception{message, ErrorCodes::NETWORK_ERROR}; + } + } +} + +void Keeper::uninitialize() +{ + logger().information("shutting down"); + BaseDaemon::uninitialize(); +} + +int Keeper::run() +{ + if (config().hasOption("help")) + { + Poco::Util::HelpFormatter help_formatter(Keeper::options()); + auto header_str = fmt::format("{} [OPTION] [-- [ARG]...]\n" + "positional arguments can be used to rewrite config.xml properties, for example, --http_port=8010", + commandName()); + help_formatter.setHeader(header_str); + help_formatter.format(std::cout); + return 0; + } + if (config().hasOption("version")) + { + std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + return 0; + } + + + return Application::run(); // NOLINT +} + +void Keeper::initialize(Poco::Util::Application & self) +{ + BaseDaemon::initialize(self); + logger().information("starting up"); + + LOG_INFO(&logger(), "OS Name = {}, OS Version = {}, OS Architecture = {}", + Poco::Environment::osName(), + Poco::Environment::osVersion(), + Poco::Environment::osArchitecture()); +} + +std::string Keeper::getDefaultCorePath() const +{ + return getCanonicalPath(config().getString("path", KEEPER_DEFAULT_PATH)) + "cores"; +} + +void Keeper::defineOptions(Poco::Util::OptionSet & options) +{ + options.addOption( + Poco::Util::Option("help", "h", "show help and exit") + .required(false) + .repeatable(false) + .binding("help")); + options.addOption( + Poco::Util::Option("version", "V", "show version and exit") + .required(false) + .repeatable(false) + .binding("version")); + BaseDaemon::defineOptions(options); +} + +int Keeper::main(const std::vector & /*args*/) +{ + Poco::Logger * log = &logger(); + + UseSSL use_ssl; + + MainThreadStatus::getInstance(); + +#if !defined(NDEBUG) || !defined(__OPTIMIZE__) + LOG_WARNING(log, "Server was built in debug mode. It will work slowly."); +#endif + +#if defined(SANITIZER) + LOG_WARNING(log, "Server was built with sanitizer. It will work slowly."); +#endif + + auto shared_context = Context::createShared(); + global_context = Context::createGlobal(shared_context.get()); + + global_context->makeGlobalContext(); + global_context->setApplicationType(Context::ApplicationType::SERVER); + const Settings & settings = global_context->getSettingsRef(); + + GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 500)); + + static ServerErrorHandler error_handler; + Poco::ErrorHandler::set(&error_handler); + + /// Initialize DateLUT early, to not interfere with running time of first query. + LOG_DEBUG(log, "Initializing DateLUT."); + DateLUT::instance(); + LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); + + Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); + + Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); + + std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); + + bool listen_try = config().getBool("listen_try", false); + if (listen_hosts.empty()) + { + listen_hosts.emplace_back("::1"); + listen_hosts.emplace_back("127.0.0.1"); + listen_try = true; + } + + auto servers = std::make_shared>(); + + if (config().has("keeper_server")) + { +#if USE_NURAFT + /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. + global_context->initializeKeeperStorageDispatcher(); + for (const auto & listen_host : listen_hosts) + { + /// TCP Keeper + const char * port_name = "keeper_server.tcp_port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers->emplace_back( + port_name, + std::make_unique( + new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams)); + + LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString()); + }); + + const char * secure_port_name = "keeper_server.tcp_port_secure"; + createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port) + { +#if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers->emplace_back( + secure_port_name, + std::make_unique( + new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams)); + LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString()); +#else + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); +#endif + + } + for (auto & server : *servers) + server.start(); + + SCOPE_EXIT({ + /** Ask to cancel background jobs all table engines, + * and also query_log. + * It is important to do early, not in destructor of Context, because + * table engines could use Context on destroy. + */ + LOG_INFO(log, "Shutting down."); + + global_context->shutdown(); + + LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); + int current_connections = 0; + for (auto & server : *servers) + { + server.stop(); + current_connections += server.currentConnections(); + } + + if (current_connections) + LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections); + else + LOG_INFO(log, "Closed all listening sockets."); + + if (current_connections > 0) + current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5)); + + if (current_connections) + LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); + else + LOG_INFO(log, "Closed connections to servers for tables."); + + global_context->shutdownKeeperStorageDispatcher(); + + /// Wait server pool to avoid use-after-free of destroyed context in the handlers + server_pool.joinAll(); + + /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. + * At this moment, no one could own shared part of Context. + */ + global_context.reset(); + shared_context.reset(); + + LOG_DEBUG(log, "Destroyed global context."); + + if (current_connections) + { + LOG_INFO(log, "Will shutdown forcefully."); + forceShutdown(); + } + }); + + + buildLoggers(config(), logger()); + + LOG_INFO(log, "Ready for connections."); + + waitForTerminationRequest(); + + return Application::EXIT_OK; +} + +} diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h new file mode 100644 index 00000000000..0fe3465f789 --- /dev/null +++ b/programs/keeper/Keeper.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include + +namespace Poco +{ + namespace Net + { + class ServerSocket; + } +} + +namespace DB +{ + +class Keeper : public BaseDaemon, public IServer +{ +public: + using ServerApplication::run; + + Poco::Util::LayeredConfiguration & config() const override + { + return BaseDaemon::config(); + } + + Poco::Logger & logger() const override + { + return BaseDaemon::logger(); + } + + ContextPtr context() const override + { + return global_context; + } + + bool isCancelled() const override + { + return BaseDaemon::isCancelled(); + } + + void defineOptions(Poco::Util::OptionSet & _options) override; + +protected: + int run() override; + + void initialize(Application & self) override; + + void uninitialize() override; + + int main(const std::vector & args) override; + + std::string getDefaultCorePath() const override; + +private: + ContextPtr global_context; + + Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; + + using CreateServerFunc = std::function; + void createServer(const std::string & listen_host, const char * port_name, bool listen_try, CreateServerFunc && func) const; +}; + +} diff --git a/programs/keeper/clickhouse-keeper.cpp b/programs/keeper/clickhouse-keeper.cpp new file mode 100644 index 00000000000..baa673f79ee --- /dev/null +++ b/programs/keeper/clickhouse-keeper.cpp @@ -0,0 +1,6 @@ +int mainEntryClickHouseKeeper(int argc, char ** argv); + +int main(int argc_, char ** argv_) +{ + return mainEntryClickHouseKeeper(argc_, argv_); +} diff --git a/programs/main.cpp b/programs/main.cpp index cbb22b7a87b..ccdf4d50fb4 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -55,6 +55,9 @@ int mainEntryClickHouseObfuscator(int argc, char ** argv); #if ENABLE_CLICKHOUSE_GIT_IMPORT int mainEntryClickHouseGitImport(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_KEEPER +int mainEntryClickHouseKeeper(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); int mainEntryClickHouseStart(int argc, char ** argv); @@ -112,6 +115,9 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_GIT_IMPORT {"git-import", mainEntryClickHouseGitImport}, #endif +#if ENABLE_CLICKHOUSE_KEEPER + {"keeper", mainEntryClickHouseKeeper}, +#endif #if ENABLE_CLICKHOUSE_INSTALL {"install", mainEntryClickHouseInstall}, {"start", mainEntryClickHouseStart}, From 41eadf9127cb8dda4153c8d28e87f6fba6a394ad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 16:04:34 +0300 Subject: [PATCH 026/204] Something working --- base/daemon/BaseDaemon.cpp | 7 +- base/daemon/BaseDaemon.h | 2 + programs/keeper/CMakeLists.txt | 2 + programs/keeper/Keeper.cpp | 115 +++++++++++++------------- programs/keeper/Keeper.h | 4 +- programs/server/CMakeLists.txt | 2 +- src/Common/Config/ConfigProcessor.cpp | 13 ++- src/Core/Defines.h | 2 + 8 files changed, 85 insertions(+), 62 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 83384038b7c..01e700ebba3 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -468,7 +468,7 @@ void BaseDaemon::reloadConfiguration() * instead of using files specified in config.xml. * (It's convenient to log in console when you start server without any command line parameters.) */ - config_path = config().getString("config-file", "config.xml"); + config_path = config().getString("config-file", getDefaultConfigFileName()); DB::ConfigProcessor config_processor(config_path, false, true); config_processor.setConfigPath(Poco::Path(config_path).makeParent().toString()); loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true); @@ -516,6 +516,11 @@ std::string BaseDaemon::getDefaultCorePath() const return "/opt/cores/"; } +std::string BaseDaemon::getDefaultConfigFileName() const +{ + return "config.xml"; +} + void BaseDaemon::closeFDs() { #if defined(OS_FREEBSD) || defined(OS_DARWIN) diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 8b9d765cf2e..3d47d85a9f5 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -149,6 +149,8 @@ protected: virtual std::string getDefaultCorePath() const; + virtual std::string getDefaultConfigFileName() const; + std::optional pid_file; std::atomic_bool is_cancelled{false}; diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 8f40fe54667..cd10baa2e57 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -12,3 +12,5 @@ set (CLICKHOUSE_KEEPER_LINK ) clickhouse_program_add(keeper) + +install (FILES clickhouse-keeper.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper RENAME keeper_config.xml) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 78c9e7d50b9..e64134ed10d 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -1,5 +1,6 @@ #include "Keeper.h" +#include #include #include #include @@ -122,16 +123,6 @@ Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port return socket_address; } -std::string getCanonicalPath(std::string && path) -{ - Poco::trimInPlace(path); - if (path.empty()) - throw Exception("path configuration parameter is empty", ErrorCodes::INVALID_CONFIG_PARAMETER); - if (path.back() != '/') - path += '/'; - return std::move(path); -} - [[noreturn]] void forceShutdown() { #if defined(THREAD_SANITIZER) && defined(OS_LINUX) @@ -146,7 +137,6 @@ std::string getCanonicalPath(std::string && path) } - Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const { auto address = makeSocketAddress(host, port, &logger()); @@ -218,11 +208,10 @@ int Keeper::run() } if (config().hasOption("version")) { - std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; + std::cout << DBMS_NAME << " keeper version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl; return 0; } - return Application::run(); // NOLINT } @@ -237,9 +226,9 @@ void Keeper::initialize(Poco::Util::Application & self) Poco::Environment::osArchitecture()); } -std::string Keeper::getDefaultCorePath() const +std::string Keeper::getDefaultConfigFileName() const { - return getCanonicalPath(config().getString("path", KEEPER_DEFAULT_PATH)) + "cores"; + return "keeper_config.xml"; } void Keeper::defineOptions(Poco::Util::OptionSet & options) @@ -266,11 +255,11 @@ int Keeper::main(const std::vector & /*args*/) MainThreadStatus::getInstance(); #if !defined(NDEBUG) || !defined(__OPTIMIZE__) - LOG_WARNING(log, "Server was built in debug mode. It will work slowly."); + LOG_WARNING(log, "Keeper was built in debug mode. It will work slowly."); #endif #if defined(SANITIZER) - LOG_WARNING(log, "Server was built with sanitizer. It will work slowly."); + LOG_WARNING(log, "Keeper was built with sanitizer. It will work slowly."); #endif auto shared_context = Context::createShared(); @@ -278,6 +267,10 @@ int Keeper::main(const std::vector & /*args*/) global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::SERVER); + + if (!config().has("keeper_server")) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration ( section) not found in config"); + const Settings & settings = global_context->getSettingsRef(); GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 500)); @@ -290,9 +283,10 @@ int Keeper::main(const std::vector & /*args*/) DateLUT::instance(); LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); - Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); + /// Don't want to use DNS cache + DNSResolver::instance().setDisableCacheFlag(); - Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); + Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024)); std::vector listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host"); @@ -306,54 +300,51 @@ int Keeper::main(const std::vector & /*args*/) auto servers = std::make_shared>(); - if (config().has("keeper_server")) - { #if USE_NURAFT - /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperStorageDispatcher(); - for (const auto & listen_host : listen_hosts) + /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. + global_context->initializeKeeperStorageDispatcher(); + for (const auto & listen_host : listen_hosts) + { + /// TCP Keeper + const char * port_name = "keeper_server.tcp_port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) { - /// TCP Keeper - const char * port_name = "keeper_server.tcp_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back( - port_name, - std::make_unique( - new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams)); + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers->emplace_back( + port_name, + std::make_unique( + new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString()); - }); + LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString()); + }); - const char * secure_port_name = "keeper_server.tcp_port_secure"; - createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port) - { + const char * secure_port_name = "keeper_server.tcp_port_secure"; + createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port) + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers->emplace_back( - secure_port_name, - std::make_unique( - new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams)); - LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString()); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers->emplace_back( + secure_port_name, + std::make_unique( + new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams)); + LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString()); #else - UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + UNUSED(port); + throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", + ErrorCodes::SUPPORT_IS_DISABLED}; #endif - }); - } + }); + } #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse keeper built without NuRaft library. Cannot use coordination."); #endif - } for (auto & server : *servers) server.start(); @@ -418,4 +409,14 @@ int Keeper::main(const std::vector & /*args*/) return Application::EXIT_OK; } + +void Keeper::logRevision() const +{ + Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} + + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", " + build_id_info + + ", PID " + std::to_string(getpid())); +} + + } diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index 0fe3465f789..ba34dbea313 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -42,6 +42,8 @@ public: void defineOptions(Poco::Util::OptionSet & _options) override; protected: + void logRevision() const override; + int run() override; void initialize(Application & self) override; @@ -50,7 +52,7 @@ protected: int main(const std::vector & args) override; - std::string getDefaultCorePath() const override; + std::string getDefaultConfigFileName() const override; private: ContextPtr global_context; diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 0dcfbce1c30..bc7f2d94153 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -38,7 +38,7 @@ if (OS_LINUX) # 1. Allow to run the binary without download of any other files. # 2. Allow to implement "sudo clickhouse install" tool. - foreach(RESOURCE_FILE config.xml users.xml embedded.xml play.html) + foreach(RESOURCE_FILE config.xml users.xml embedded.xml play.html keeper_embedded.xml) set(RESOURCE_OBJ ${RESOURCE_FILE}.o) set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ}) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 39ab407579d..bc2a8a27943 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -462,10 +462,19 @@ XMLDocumentPtr ConfigProcessor::processConfig( } else { - /// When we can use config embedded in binary. + /// These embedded files added during build with some cmake magic. + /// Look at the end of programs/sever/CMakeLists.txt. + std::string embedded_name; if (path == "config.xml") + embedded_name = "embedded.xml"; + + if (path == "keeper_config.xml") + embedded_name = "keeper_embedded.xml"; + + /// When we can use config embedded in binary. + if (!embedded_name.empty()) { - auto resource = getResource("embedded.xml"); + auto resource = getResource(embedded_name); if (resource.empty()) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path); diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 668a60f9be8..fbcc6fabbf0 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -98,6 +98,8 @@ #define DBMS_DEFAULT_PATH "/var/lib/clickhouse/" +#define KEEPER_DEFAULT_PATH "/var/lib/clickhouse/coordination" + // more aliases: https://mailman.videolan.org/pipermail/x264-devel/2014-May/010660.html /// Marks that extra information is sent to a shard. It could be any magic numbers. From ed89af22b27d14ee3e694bfd7b504921ab58d45e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 16:31:29 +0300 Subject: [PATCH 027/204] Move jepsen tests to clickhouse-keeper --- .../jepsen.clickhouse-keeper/resources/config.xml | 1 - .../resources/keeper_config.xml | 8 ++++++++ .../jepsen.clickhouse-keeper/resources/listen.xml | 3 --- .../jepsen.clickhouse-keeper/resources/users.xml | 1 - .../src/jepsen/clickhouse_keeper/db.clj | 7 ++----- .../src/jepsen/clickhouse_keeper/utils.clj | 15 ++++++--------- 6 files changed, 16 insertions(+), 19 deletions(-) delete mode 120000 tests/jepsen.clickhouse-keeper/resources/config.xml delete mode 100644 tests/jepsen.clickhouse-keeper/resources/listen.xml delete mode 120000 tests/jepsen.clickhouse-keeper/resources/users.xml diff --git a/tests/jepsen.clickhouse-keeper/resources/config.xml b/tests/jepsen.clickhouse-keeper/resources/config.xml deleted file mode 120000 index c7596baa075..00000000000 --- a/tests/jepsen.clickhouse-keeper/resources/config.xml +++ /dev/null @@ -1 +0,0 @@ -../../../programs/server/config.xml \ No newline at end of file diff --git a/tests/jepsen.clickhouse-keeper/resources/keeper_config.xml b/tests/jepsen.clickhouse-keeper/resources/keeper_config.xml index 528ea5d77be..f06d9683990 100644 --- a/tests/jepsen.clickhouse-keeper/resources/keeper_config.xml +++ b/tests/jepsen.clickhouse-keeper/resources/keeper_config.xml @@ -1,4 +1,12 @@ + :: + + + trace + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + 9181 {id} diff --git a/tests/jepsen.clickhouse-keeper/resources/listen.xml b/tests/jepsen.clickhouse-keeper/resources/listen.xml deleted file mode 100644 index de8c737ff75..00000000000 --- a/tests/jepsen.clickhouse-keeper/resources/listen.xml +++ /dev/null @@ -1,3 +0,0 @@ - - :: - diff --git a/tests/jepsen.clickhouse-keeper/resources/users.xml b/tests/jepsen.clickhouse-keeper/resources/users.xml deleted file mode 120000 index 41b137a130f..00000000000 --- a/tests/jepsen.clickhouse-keeper/resources/users.xml +++ /dev/null @@ -1 +0,0 @@ -../../../programs/server/users.xml \ No newline at end of file diff --git a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj index fdb6b233fec..30c2c0eaf4f 100644 --- a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj +++ b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/db.clj @@ -89,10 +89,7 @@ (defn install-configs [test node] - (c/exec :echo (slurp (io/resource "config.xml")) :> (str configs-dir "/config.xml")) - (c/exec :echo (slurp (io/resource "users.xml")) :> (str configs-dir "/users.xml")) - (c/exec :echo (slurp (io/resource "listen.xml")) :> (str sub-configs-dir "/listen.xml")) - (c/exec :echo (cluster-config test node (slurp (io/resource "keeper_config.xml"))) :> (str sub-configs-dir "/keeper_config.xml"))) + (c/exec :echo (cluster-config test node (slurp (io/resource "keeper_config.xml"))) :> (str configs-dir "/keeper_config.xml"))) (defn collect-traces [test node] @@ -144,7 +141,7 @@ (info node "Coordination files exists, going to compress") (c/cd data-dir (c/exec :tar :czf "coordination.tar.gz" "coordination"))))) - (let [common-logs [stderr-file (str logs-dir "/clickhouse-server.log") (str data-dir "/coordination.tar.gz")] + (let [common-logs [stderr-file (str logs-dir "/clickhouse-keeper.log") (str data-dir "/coordination.tar.gz")] gdb-log (str logs-dir "/gdb.log")] (if (cu/exists? (str logs-dir "/gdb.log")) (conj common-logs gdb-log) diff --git a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/utils.clj b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/utils.clj index 70813457251..0457ff6eae2 100644 --- a/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/utils.clj +++ b/tests/jepsen.clickhouse-keeper/src/jepsen/clickhouse_keeper/utils.clj @@ -143,7 +143,7 @@ [node test] (info "Checking server alive on" node) (try - (c/exec binary-path :client :--query "SELECT 1") + (zk-connect (name node) 9181 30000) (catch Exception _ false))) (defn wait-clickhouse-alive! @@ -169,16 +169,13 @@ :logfile stderr-file :chdir data-dir} binary-path - :server - :--config (str configs-dir "/config.xml") + :keeper + :--config (str configs-dir "/keeper_config.xml") :-- - :--path (str data-dir "/") - :--user_files_path (str data-dir "/user_files") - :--top_level_domains_path (str data-dir "/top_level_domains") - :--logger.log (str logs-dir "/clickhouse-server.log") - :--logger.errorlog (str logs-dir "/clickhouse-server.err.log") + :--logger.log (str logs-dir "/clickhouse-keeper.log") + :--logger.errorlog (str logs-dir "/clickhouse-keeper.err.log") :--keeper_server.snapshot_storage_path coordination-snapshots-dir - :--keeper_server.logs_storage_path coordination-logs-dir) + :--keeper_server.log_storage_path coordination-logs-dir) (wait-clickhouse-alive! node test))) (defn md5 [^String s] From 43ee9f0a3a3b3998a930c92e97ef7b4e43f3b111 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 17:05:44 +0300 Subject: [PATCH 028/204] Check for directory owner --- programs/keeper/Keeper.cpp | 70 ++++++++++++++++++++++--- src/Coordination/KeeperServer.cpp | 15 +++++- src/Coordination/KeeperStateManager.cpp | 20 ++++++- 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index e64134ed10d..a7e90eb5f07 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -1,5 +1,7 @@ #include "Keeper.h" +#include +#include #include #include #include @@ -15,6 +17,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) @@ -31,6 +34,11 @@ # include #endif +#if defined(OS_LINUX) +# include +# include +#endif + int mainEntryClickHouseKeeper(int argc, char ** argv) { DB::Keeper app; @@ -54,14 +62,9 @@ namespace ErrorCodes { extern const int NO_ELEMENTS_IN_CONFIG; extern const int SUPPORT_IS_DISABLED; - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int EXCESSIVE_ELEMENT_IN_CONFIG; - extern const int INVALID_CONFIG_PARAMETER; - extern const int SYSTEM_ERROR; - extern const int FAILED_TO_GETPWUID; - extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; - extern const int CORRUPTED_DATA; + extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; + extern const int FAILED_TO_GETPWUID; } namespace @@ -135,6 +138,26 @@ Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port #endif } +std::string getUserName(uid_t user_id) +{ + /// Try to convert user id into user name. + auto buffer_size = sysconf(_SC_GETPW_R_SIZE_MAX); + if (buffer_size <= 0) + buffer_size = 1024; + std::string buffer; + buffer.reserve(buffer_size); + + struct passwd passwd_entry; + struct passwd * result = nullptr; + const auto error = getpwuid_r(user_id, &passwd_entry, buffer.data(), buffer_size, &result); + + if (error) + throwFromErrno("Failed to find user name for " + toString(user_id), ErrorCodes::FAILED_TO_GETPWUID, error); + else if (result) + return result->pw_name; + return toString(user_id); +} + } Poco::Net::SocketAddress Keeper::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const @@ -271,6 +294,39 @@ int Keeper::main(const std::vector & /*args*/) if (!config().has("keeper_server")) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration ( section) not found in config"); + + std::string path; + + if (config().has("keeper_server.storage_path")) + path = config().getString("keeper_server.storage_path"); + else if (config().has("keeper_server.log_storage_path")) + path = config().getString("keeper_server.log_storage_path"); + else if (config().has("keeper_server.snapshot_storage_path")) + path = config().getString("keeper_server.snapshot_storage_path"); + else + path = std::filesystem::path{DBMS_DEFAULT_PATH} / "coordination/logs"; + + + /// Check that the process user id matches the owner of the data. + const auto effective_user_id = geteuid(); + struct stat statbuf; + if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) + { + const auto effective_user = getUserName(effective_user_id); + const auto data_owner = getUserName(statbuf.st_uid); + std::string message = "Effective user of the process (" + effective_user + + ") does not match the owner of the data (" + data_owner + ")."; + if (effective_user_id == 0) + { + message += " Run under 'sudo -u " + data_owner + "'."; + throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + } + else + { + LOG_WARNING(log, message); + } + } + const Settings & settings = global_context->getSettingsRef(); GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 500)); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index a3214474e96..56165e61be5 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include namespace DB @@ -59,6 +60,18 @@ void setSSLParams(nuraft::asio_service::options & asio_opts) } #endif +std::string getSnapshotsPathFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + /// the most specialized path + if (config.has("keeper_server.snapshot_storage_path")) + return config.getString("keeper_server.snapshot_storage_path"); + + if (config.has("keeper_server.storage_path")) + return std::filesystem::path{config.getString("keeper_server.storage_path")} / "snapshots"; + + return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/snapshots"; +} + } KeeperServer::KeeperServer( @@ -71,7 +84,7 @@ KeeperServer::KeeperServer( , coordination_settings(coordination_settings_) , state_machine(nuraft::cs_new( responses_queue_, snapshots_queue_, - config.getString("keeper_server.snapshot_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/snapshots"), + getSnapshotsPathFromConfig(config), coordination_settings)) , state_manager(nuraft::cs_new(server_id, "keeper_server", config, coordination_settings)) , log(&Poco::Logger::get("KeeperServer")) diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index e57ae7e7c19..57a9608fce3 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -9,6 +10,23 @@ namespace ErrorCodes extern const int RAFT_ERROR; } +namespace +{ + +std::string getLogsPathFromConfig(const std::string & config_prefix, const Poco::Util::AbstractConfiguration & config) +{ + /// the most specialized path + if (config.has(config_prefix + ".log_storage_path")) + return config.getString(config_prefix + ".log_storage_path"); + + if (config.has(config_prefix + ".storage_path")) + return std::filesystem::path{config.getString(config_prefix + ".storage_path")} / "logs"; + + return std::filesystem::path{config.getString("path", DBMS_DEFAULT_PATH)} / "coordination/logs"; +} + +} + KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path) : my_server_id(server_id_) , my_port(port) @@ -28,7 +46,7 @@ KeeperStateManager::KeeperStateManager( : my_server_id(my_server_id_) , secure(config.getBool(config_prefix + ".raft_configuration.secure", false)) , log_store(nuraft::cs_new( - config.getString(config_prefix + ".log_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/logs"), + getLogsPathFromConfig(config_prefix, config), coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync)) , cluster_config(nuraft::cs_new()) { From 4e3f103748a0fc5f2c575bc50e74dd389d20cd4c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 17:09:05 +0300 Subject: [PATCH 029/204] Missed config file --- programs/keeper/keeper_config.xml | 81 +++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 programs/keeper/keeper_config.xml diff --git a/programs/keeper/keeper_config.xml b/programs/keeper/keeper_config.xml new file mode 100644 index 00000000000..ef218c9f2d7 --- /dev/null +++ b/programs/keeper/keeper_config.xml @@ -0,0 +1,81 @@ + + + + trace + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + 1000M + 10 + + + + 4096 + + + 9181 + + + 1 + + /var/lib/clickhouse/coordination/logs + /var/lib/clickhouse/coordination/snapshots + + + 10000 + 30000 + information + + + + + + 1 + + + localhost + 44444 + + + + + + + + + + + + + /etc/clickhouse-keeper/server.crt + /etc/clickhouse-keeper/server.key + + /etc/clickhouse-keeper/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + From 9f5f29f0aaf862fa6e760e34a60e728bfde4feee Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 17:16:45 +0300 Subject: [PATCH 030/204] Remove accident changes --- src/Core/Defines.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Core/Defines.h b/src/Core/Defines.h index fbcc6fabbf0..668a60f9be8 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -98,8 +98,6 @@ #define DBMS_DEFAULT_PATH "/var/lib/clickhouse/" -#define KEEPER_DEFAULT_PATH "/var/lib/clickhouse/coordination" - // more aliases: https://mailman.videolan.org/pipermail/x264-devel/2014-May/010660.html /// Marks that extra information is sent to a shard. It could be any magic numbers. From da73ba04dab12e79d208a56f6c5e8ba3658fb0b5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 17:57:01 +0300 Subject: [PATCH 031/204] Add missed file --- programs/server/keeper_embedded.xml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 programs/server/keeper_embedded.xml diff --git a/programs/server/keeper_embedded.xml b/programs/server/keeper_embedded.xml new file mode 100644 index 00000000000..37edaedba80 --- /dev/null +++ b/programs/server/keeper_embedded.xml @@ -0,0 +1,21 @@ + + + trace + true + + + + 9181 + 1 + ./keeper_log + ./keeper_snapshot + + + + 1 + localhost + 44444 + + + + From dc55d197092613d034656f3b8d8339e1fdc32dfb Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 12 May 2021 23:00:28 +0300 Subject: [PATCH 032/204] Fix fasttest image --- docker/test/fasttest/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 42c720a7e63..3c899b0dabb 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -73,7 +73,7 @@ function start_server --path "$FASTTEST_DATA" --user_files_path "$FASTTEST_DATA/user_files" --top_level_domains_path "$FASTTEST_DATA/top_level_domains" - --keeper_server.log_storage_path "$FASTTEST_DATA/coordination" + --keeper_server.storage_path "$FASTTEST_DATA/coordination" ) clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" & server_pid=$! From df22534dbc00176773c8d7c18086db3ad6439960 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 13 May 2021 11:42:10 +0300 Subject: [PATCH 033/204] Fix cmake --- programs/keeper/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index cd10baa2e57..211bd74ba3e 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -13,4 +13,4 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) -install (FILES clickhouse-keeper.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper RENAME keeper_config.xml) +install (FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) From 7115045317999f0d06ee7c9d123f26e6588330e2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 13 May 2021 09:39:57 +0000 Subject: [PATCH 034/204] Tiny changes --- src/Storages/RabbitMQ/RabbitMQHandler.h | 8 -------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 15 +++++++++------ src/Storages/RabbitMQ/StorageRabbitMQ.h | 6 +++--- tests/integration/test_storage_rabbitmq/test.py | 8 ++++---- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index f1c7afffc5a..78ccf56dec2 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -18,14 +18,6 @@ namespace Loop } -class RabbitMQChannel : public AMQP::TcpChannel -{ -public: - RabbitMQChannel(AMQP::TcpConnection * connection) : TcpChannel(connection) {} - ~RabbitMQChannel() override { close(); } -}; - - class RabbitMQHandler : public AMQP::LibUvHandler { diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 0949c8ac802..8ec55c1b1c1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -271,7 +271,7 @@ void StorageRabbitMQ::initRabbitMQ() return; } - RabbitMQChannel rabbit_channel(connection.get()); + AMQP::TcpChannel rabbit_channel(connection.get()); /// Main exchange -> Bridge exchange -> ( Sharding exchange ) -> Queues -> Consumers @@ -283,10 +283,11 @@ void StorageRabbitMQ::initRabbitMQ() LOG_TRACE(log, "RabbitMQ setup completed"); rabbit_is_ready = true; + rabbit_channel.close(); } -void StorageRabbitMQ::initExchange(RabbitMQChannel & rabbit_channel) +void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) { /// Exchange hierarchy: /// 1. Main exchange (defined with table settings - rabbitmq_exchange_name, rabbitmq_exchange_type). @@ -357,7 +358,7 @@ void StorageRabbitMQ::initExchange(RabbitMQChannel & rabbit_channel) } -void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) +void StorageRabbitMQ::bindExchange(AMQP::TcpChannel & rabbit_channel) { size_t bound_keys = 0; @@ -418,7 +419,7 @@ void StorageRabbitMQ::bindExchange(RabbitMQChannel & rabbit_channel) } -void StorageRabbitMQ::bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channel) +void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_channel) { auto success_callback = [&](const std::string & queue_name, int msgcount, int /* consumercount */) { @@ -574,7 +575,7 @@ void StorageRabbitMQ::unbindExchange() event_handler->updateLoopState(Loop::STOP); looping_task->deactivate(); - RabbitMQChannel rabbit_channel(connection.get()); + AMQP::TcpChannel rabbit_channel(connection.get()); rabbit_channel.removeExchange(bridge_exchange) .onSuccess([&]() { @@ -589,6 +590,7 @@ void StorageRabbitMQ::unbindExchange() { event_handler->iterateLoop(); } + rabbit_channel.close(); }); } @@ -720,7 +722,7 @@ void StorageRabbitMQ::cleanupRabbitMQ() const if (use_user_setup) return; - RabbitMQChannel rabbit_channel(connection.get()); + AMQP::TcpChannel rabbit_channel(connection.get()); for (const auto & queue : queues) { /// AMQP::ifunused is needed, because it is possible to share queues between multiple tables and dropping @@ -740,6 +742,7 @@ void StorageRabbitMQ::cleanupRabbitMQ() const }); } event_handler->startBlockingLoop(); + rabbit_channel.close(); /// Also there is no need to cleanup exchanges as they were created with AMQP::autodelete option. Once queues /// are removed, exchanges will also be cleaned. diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 0e47a8e1150..fd2da6bba21 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -159,9 +159,9 @@ private: void initRabbitMQ(); void cleanupRabbitMQ() const; - void initExchange(RabbitMQChannel & rabbit_channel); - void bindExchange(RabbitMQChannel & rabbit_channel); - void bindQueue(size_t queue_id, RabbitMQChannel & rabbit_channel); + void initExchange(AMQP::TcpChannel & rabbit_channel); + void bindExchange(AMQP::TcpChannel & rabbit_channel); + void bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_channel); bool restoreConnection(bool reconnecting); bool streamToViews(); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 008f5313e22..1f14886e50f 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1980,7 +1980,7 @@ def test_rabbitmq_drop_table_properly(rabbitmq_cluster): SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'drop', rabbitmq_format = 'JSONEachRow', - rabbitmq_queue_base = 'rabbit_queue' + rabbitmq_queue_base = 'rabbit_queue_drop' ''') credentials = pika.PlainCredentials('root', 'clickhouse') @@ -1994,14 +1994,14 @@ def test_rabbitmq_drop_table_properly(rabbitmq_cluster): if result == "1\t2\n": break - exists = channel.queue_declare(queue='rabbit_queue', passive=True) + exists = channel.queue_declare(queue='rabbit_queue_drop', passive=True) assert(exists) instance.query("DROP TABLE test.rabbitmq_drop") time.sleep(30) try: - exists = channel.queue_declare(callback, queue='rabbit_queue', passive=True) + exists = channel.queue_declare(callback, queue='rabbit_queue_drop', passive=True) except Exception as e: exists = False @@ -2016,7 +2016,7 @@ def test_rabbitmq_queue_settings(rabbitmq_cluster): SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'rabbit_exchange', rabbitmq_format = 'JSONEachRow', - rabbitmq_queue_base = 'rabbit_queue', + rabbitmq_queue_base = 'rabbit_queue_settings', rabbitmq_queue_settings_list = 'x-max-length=10,x-overflow=reject-publish' ''') From e114c7eb8b8d26a8a160af7e96eff1ee73fc738d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 13 May 2021 14:29:59 +0300 Subject: [PATCH 035/204] fix virtual parts in REPLACE_RANGE --- src/Storages/MergeTree/ActiveDataPartSet.cpp | 14 +++ src/Storages/MergeTree/MergeTreeData.cpp | 10 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 22 +++- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 32 ++++++ .../MergeTree/ReplicatedMergeTreeLogEntry.h | 30 +---- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 34 ++++-- src/Storages/StorageReplicatedMergeTree.cpp | 106 +++++++++++++----- src/Storages/StorageReplicatedMergeTree.h | 2 +- .../0_stateless/01154_move_partition.sh | 2 +- 9 files changed, 184 insertions(+), 68 deletions(-) diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index e4d7465b360..bbf05afa42c 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -1,10 +1,16 @@ #include +#include #include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names) : format_version(format_version_) { @@ -15,6 +21,7 @@ ActiveDataPartSet::ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) { + /// TODO make it exception safe (out_replaced_parts->push_back(...) may throw) auto part_info = MergeTreePartInfo::fromPartName(name, format_version); if (getContainingPartImpl(part_info) != part_info_to_name.end()) @@ -32,6 +39,8 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) --it; if (!part_info.contains(it->first)) { + if (!part_info.isDisjoint(it->first)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug.", name, it->first.getPartName()); ++it; break; } @@ -47,11 +56,16 @@ bool ActiveDataPartSet::add(const String & name, Strings * out_replaced_parts) /// Let's go to the right. while (it != part_info_to_name.end() && part_info.contains(it->first)) { + if (part_info == it->first) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", name); if (out_replaced_parts) out_replaced_parts->push_back(it->second); part_info_to_name.erase(it++); } + if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects next part {}. It is a bug.", name, it->first.getPartName()); + part_info_to_name.emplace(part_info, name); return true; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 80d50e199cd..49fa885ac62 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2005,8 +2005,8 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( } if (!new_part_info.isDisjoint((*prev)->info)) - throw Exception("Part " + new_part_name + " intersects previous part " + (*prev)->getNameWithState() + - ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug.", + new_part_name, (*prev)->getNameWithState()); break; } @@ -2019,7 +2019,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( while (end != committed_parts_range.end()) { if ((*end)->info == new_part_info) - throw Exception("Unexpected duplicate part " + (*end)->getNameWithState() + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected duplicate part {}. It is a bug.", (*end)->getNameWithState()); if (!new_part_info.contains((*end)->info)) { @@ -2030,8 +2030,8 @@ MergeTreeData::DataPartsVector MergeTreeData::getActivePartsToReplace( } if (!new_part_info.isDisjoint((*end)->info)) - throw Exception("Part " + new_part_name + " intersects next part " + (*end)->getNameWithState() + - ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects next part {}. It is a bug.", + new_part_name, (*end)->getNameWithState()); break; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 3eb9ef1b481..f9278347201 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -236,6 +236,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// Previous part only in boundaries of partition frame const MergeTreeData::DataPartPtr * prev_part = nullptr; + String range_str; size_t parts_selected_precondition = 0; for (const MergeTreeData::DataPartPtr & part : data_parts) { @@ -244,7 +245,11 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (!prev_partition_id || partition_id != *prev_partition_id) { if (parts_ranges.empty() || !parts_ranges.back().empty()) + { + LOG_DEBUG(log, "selectPartsToMerge 1: range {}", range_str); + range_str.clear(); parts_ranges.emplace_back(); + } /// New partition frame. prev_partition_id = &partition_id; prev_part = nullptr; @@ -257,17 +262,26 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( * So we have to check if this part is currently being inserted with quorum and so on and so forth. * Obviously we have to check it manually only for the first part * of each partition because it will be automatically checked for a pair of parts. */ - if (!can_merge_callback(nullptr, part, nullptr)) + String reason; + bool can = can_merge_callback(nullptr, part, &reason); + LOG_DEBUG(log, "Can merge single part {}: {} {}", part->name, can, reason); + if (!can) continue; + } else { /// If we cannot merge with previous part we had to start new parts /// interval (in the same partition) - if (!can_merge_callback(*prev_part, part, nullptr)) + String reason; + bool can = can_merge_callback(*prev_part, part, &reason); + LOG_DEBUG(log, "Can merge {} and {}: {} {}", (*prev_part)->name, part->name, can, reason); + if (!can) { /// Starting new interval in the same partition assert(!parts_ranges.back().empty()); + LOG_DEBUG(log, "selectPartsToMerge 2: range {}", range_str); + range_str.clear(); parts_ranges.emplace_back(); /// Now we have no previous part, but it affects only logging @@ -286,6 +300,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( ++parts_selected_precondition; + range_str += part->name + " "; parts_ranges.back().emplace_back(part_info); /// Check for consistency of data parts. If assertion is failed, it requires immediate investigation. @@ -298,6 +313,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( prev_part = ∂ } + LOG_DEBUG(log, "selectPartsToMerge 3: range {}", range_str); + + LOG_DEBUG(log, "selectPartsToMerge: {} ranges", parts_ranges.size()); if (parts_selected_precondition == 0) { if (out_disable_reason) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 7d8ba0e4a30..22e8b7afa17 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -386,4 +387,35 @@ ReplicatedMergeTreeLogEntry::Ptr ReplicatedMergeTreeLogEntry::parse(const String return res; } +Strings ReplicatedMergeTreeLogEntryData::getVirtualPartNames(MergeTreeDataFormatVersion format_version) const +{ + /// Doesn't produce any part + if (type == ALTER_METADATA) + return {}; + + /// DROP_RANGE does not add a real part, but we must disable merges in that range + if (type == DROP_RANGE) + return {new_part_name}; + + /// Return {} because selection of merges in the partition where the column is cleared + /// should not be blocked (only execution of merges should be blocked). + if (type == CLEAR_COLUMN || type == CLEAR_INDEX) + return {}; + + if (type == REPLACE_RANGE) + { + Strings res = replace_range_entry->new_part_names; + auto drop_range_info = MergeTreePartInfo::fromPartName(replace_range_entry->drop_range_part_name, format_version); + assert(drop_range_info.getBlocksCount() != 0); + if (drop_range_info.getBlocksCount() > 1) + { + /// It's REPLACE, not MOVE or ATTACH, so drop range is real + res.emplace_back(replace_range_entry->drop_range_part_name); + } + return res; + } + + return {new_part_name}; +} + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 309120560e7..4d4569981f6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -128,36 +129,13 @@ struct ReplicatedMergeTreeLogEntryData /// Returns a set of parts that will appear after executing the entry + parts to block /// selection of merges. These parts are added to queue.virtual_parts. - Strings getVirtualPartNames() const - { - /// Doesn't produce any part - if (type == ALTER_METADATA) - return {}; - - /// DROP_RANGE does not add a real part, but we must disable merges in that range - if (type == DROP_RANGE) - return {new_part_name}; - - /// Return {} because selection of merges in the partition where the column is cleared - /// should not be blocked (only execution of merges should be blocked). - if (type == CLEAR_COLUMN || type == CLEAR_INDEX) - return {}; - - if (type == REPLACE_RANGE) - { - Strings res = replace_range_entry->new_part_names; - res.emplace_back(replace_range_entry->drop_range_part_name); - return res; - } - - return {new_part_name}; - } + Strings getVirtualPartNames(MergeTreeDataFormatVersion format_version) const; /// Returns set of parts that denote the block number ranges that should be blocked during the entry execution. /// These parts are added to future_parts. - Strings getBlockingPartNames() const + Strings getBlockingPartNames(MergeTreeDataFormatVersion format_version) const { - Strings res = getVirtualPartNames(); + Strings res = getVirtualPartNames(format_version); if (type == CLEAR_COLUMN) res.emplace_back(new_part_name); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 7a0f60ff327..1327ec72eb7 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -133,7 +133,7 @@ void ReplicatedMergeTreeQueue::insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, std::lock_guard & state_lock) { - for (const String & virtual_part_name : entry->getVirtualPartNames()) + for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) { virtual_parts.add(virtual_part_name); addPartToMutations(virtual_part_name); @@ -220,7 +220,7 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( removeCoveredPartsFromMutations(entry->actual_new_part_name, /*remove_part = */ false, /*remove_covered_parts = */ true); } - for (const String & virtual_part_name : entry->getVirtualPartNames()) + for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) { current_parts.add(virtual_part_name); @@ -249,7 +249,7 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( } else { - for (const String & virtual_part_name : entry->getVirtualPartNames()) + for (const String & virtual_part_name : entry->getVirtualPartNames(format_version)) { /// Because execution of the entry is unsuccessful, /// `virtual_part_name` will never appear so we won't need to mutate @@ -752,7 +752,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C /// mutation block number that would appear as a result of executing the queue. for (const auto & queue_entry : queue) { - for (const String & produced_part_name : queue_entry->getVirtualPartNames()) + for (const String & produced_part_name : queue_entry->getVirtualPartNames(format_version)) { auto part_info = MergeTreePartInfo::fromPartName(produced_part_name, format_version); @@ -1033,7 +1033,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( || entry.type == LogEntry::ATTACH_PART || entry.type == LogEntry::MUTATE_PART) { - for (const String & new_part_name : entry.getBlockingPartNames()) + for (const String & new_part_name : entry.getBlockingPartNames(format_version)) { if (!isNotCoveredByFuturePartsImpl(entry.znode_name, new_part_name, out_postpone_reason, state_lock)) return false; @@ -1251,7 +1251,7 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::CurrentlyExecuting(const Replicate ++entry->num_tries; entry->last_attempt_time = time(nullptr); - for (const String & new_part_name : entry->getBlockingPartNames()) + for (const String & new_part_name : entry->getBlockingPartNames(queue.format_version)) { if (!queue.future_parts.emplace(new_part_name, entry).second) throw Exception("Tagging already tagged future part " + new_part_name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); @@ -1288,7 +1288,7 @@ ReplicatedMergeTreeQueue::CurrentlyExecuting::~CurrentlyExecuting() entry->currently_executing = false; entry->execution_complete.notify_all(); - for (const String & new_part_name : entry->getBlockingPartNames()) + for (const String & new_part_name : entry->getBlockingPartNames(queue.format_version)) { if (!queue.future_parts.erase(new_part_name)) LOG_ERROR(queue.log, "Untagging already untagged future part {}. This is a bug.", new_part_name); @@ -1585,7 +1585,7 @@ void ReplicatedMergeTreeQueue::disableMergesInBlockRange(const String & part_nam std::lock_guard lock(state_mutex); virtual_parts.add(part_name); } - std::this_thread::sleep_for(std::chrono::milliseconds(500)); //FIXME + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); //FIXME } @@ -1817,6 +1817,24 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( } else inprogress_quorum_part.clear(); + + String blocks_str; + for (const auto & partition : committing_blocks) + { + blocks_str += partition.first; + blocks_str += " ("; + for (const auto & num : partition.second) + blocks_str += toString(num); + blocks_str += + ") "; + } + ActiveDataPartSet virtual_parts(queue.format_version); + { + std::lock_guard lock(queue.state_mutex); + virtual_parts = queue.virtual_parts; + } + + LOG_DEBUG(queue.log, "MergePredicate: ver {},\t prev_virt {},\t comm {},\t, virt {},\t iqp {}", + merges_version, boost::algorithm::join(prev_virtual_parts.getParts(), ", "), blocks_str, boost::algorithm::join(virtual_parts.getParts(), ", "), inprogress_quorum_part); } bool ReplicatedMergeTreeMergePredicate::operator()( diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 278dd1da36d..2d9d4b6fd4e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -225,6 +225,26 @@ static String extractZooKeeperPath(const String & path) return normalizeZooKeeperPath(path); } +static MergeTreePartInfo makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(const String & partition_id) +{ + /// NOTE We don't have special log entry type for MOVE PARTITION/ATTACH PARTITION FROM, + /// so we use REPLACE_RANGE with dummy range of one block, which means "attach, not replace". + /// It's safe to fill drop range for MOVE PARTITION/ATTACH PARTITION FROM with zeros, + /// because drop range for REPLACE PARTITION must contain at least 2 blocks, + /// so we can distinguish dummy drop range from any real or virtual part. + /// But we should never construct such part name, even for virtual part, + /// because it can be confused with real part _0_0_0. + /// TODO get rid of this. + + MergeTreePartInfo drop_range; + drop_range.partition_id = partition_id; + drop_range.min_block = 0; + drop_range.max_block = 0; + drop_range.level = 0; + drop_range.mutation = 0; + return drop_range; +} + StorageReplicatedMergeTree::StorageReplicatedMergeTree( const String & zookeeper_path_, const String & replica_name_, @@ -2149,13 +2169,16 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) { Stopwatch watch; auto & entry_replace = *entry.replace_range_entry; + LOG_DEBUG(log, "Executing log entry {} to replace parts range {} with {} parts from {}.{}", + entry.znode_name, entry_replace.drop_range_part_name, entry_replace.new_part_names.size(), + entry_replace.from_database, entry_replace.from_table); auto metadata_snapshot = getInMemoryMetadataPtr(); MergeTreePartInfo drop_range = MergeTreePartInfo::fromPartName(entry_replace.drop_range_part_name, format_version); /// Range with only one block has special meaning ATTACH PARTITION - bool replace = drop_range.getBlocksCount() > 1; + bool replace = drop_range.getBlocksCount() > 1; //FIXME - queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); + queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); //FIXME struct PartDescription { @@ -2226,7 +2249,16 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) } if (parts_to_add.empty() && replace) + { parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + String parts_to_remove_str; + for (const auto & part : parts_to_remove) + { + parts_to_remove_str += part->name; + parts_to_remove_str += " "; + } + LOG_TRACE(log, "Replacing {} parts {}with empty set", parts_to_remove.size(), parts_to_remove_str); + } } if (parts_to_add.empty()) @@ -2361,8 +2393,9 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) /// Filter covered parts PartDescriptions final_parts; + Strings final_part_names; { - Strings final_part_names = adding_parts_active_set.getParts(); + final_part_names = adding_parts_active_set.getParts(); for (const String & final_part_name : final_part_names) { @@ -2380,7 +2413,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (!prev.found_new_part_info.isDisjoint(curr.found_new_part_info)) { throw Exception("Intersected final parts detected: " + prev.found_new_part_name - + " and " + curr.found_new_part_name + ". It should be investigated.", ErrorCodes::INCORRECT_DATA); + + " and " + curr.found_new_part_name + ". It should be investigated.", ErrorCodes::LOGICAL_ERROR); } } } @@ -2459,7 +2492,17 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) transaction.commit(&data_parts_lock); if (replace) + { parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + String parts_to_remove_str; + for (const auto & part : parts_to_remove) + { + parts_to_remove_str += part->name; + parts_to_remove_str += " "; + } + LOG_TRACE(log, "Replacing {} parts {}with {} parts ", parts_to_remove.size(), parts_to_remove_str, + final_parts.size(), boost::algorithm::join(final_part_names, ", ")); + } } PartLog::addNewParts(getContext(), res_parts, watch.elapsed()); @@ -4731,7 +4774,7 @@ static String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, return part_info.getPartName(); } -bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, bool for_replace_partition) +bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, bool for_replace_range) { /// Even if there is no data in the partition, you still need to mark the range for deletion. /// - Because before executing DETACH, tasks for downloading parts to this partition can be executed. @@ -4754,17 +4797,22 @@ bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const St mutation_version = queue.getCurrentMutationVersion(partition_id, right); } - /// REPLACE PARTITION uses different max level and does not decrement max_block of DROP_RANGE for unknown (probably historical) reason. - auto max_level = std::numeric_limits::max(); - if (!for_replace_partition) + /// Empty partition. + if (right == 0) + return false; + + --right; + + decltype(part_info.level) max_level = MergeTreePartInfo::MAX_LEVEL; + if (for_replace_range) { - max_level = MergeTreePartInfo::MAX_LEVEL; + /// REPLACE/MOVE PARTITION uses different max level for unknown (probably historical) reason. + max_level = std::numeric_limits::max(); - /// Empty partition. - if (right == 0) - return false; - - --right; + /// NOTE Undo max block number decrement for REPLACE_RANGE, because there are invariants: + /// - drop range for REPLACE PARTITION must contain at least 2 blocks (1 skipped block and at least 1 real block) + /// - drop range for MOVE PARTITION/ATTACH PARTITION FROM always contains 1 block + ++right; } /// Artificial high level is chosen, to make this part "covering" all parts inside. @@ -6069,13 +6117,26 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// So, such case has special meaning, if drop_range contains only one block it means that nothing to drop. /// TODO why not to add normal DROP_RANGE entry to replication queue if `replace` is true? MergeTreePartInfo drop_range; - getFakePartCoveringAllPartsInPartition(partition_id, drop_range, true); + bool partition_was_empty = !getFakePartCoveringAllPartsInPartition(partition_id, drop_range, true); + if (replace && partition_was_empty) + { + /// Nothing to drop, will just attach new parts + LOG_INFO(log, "Partition {} was empty, REPLACE PARTITION will work as ATTACH PARTITION FROM", drop_range.partition_id); + replace = false; + } + if (!replace) - drop_range.min_block = drop_range.max_block; + { + /// It's ATTACH PARTITION FROM, not REPLACE PARTITION. We have to reset drop range + drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(drop_range.partition_id); + } + + assert(drop_range.getBlocksCount() > 0); + assert(replace == (drop_range.getBlocksCount() > 1)); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); - if (drop_range.getBlocksCount() > 1) + if (replace) { /// We have to prohibit merges in drop_range, since new merge log entry appeared after this REPLACE FROM entry /// could produce new merged part instead in place of just deleted parts. @@ -6268,10 +6329,10 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta /// A range for log entry to remove parts from the source table (myself). MergeTreePartInfo drop_range; - getFakePartCoveringAllPartsInPartition(partition_id, drop_range, true); + bool partition_was_not_empty = getFakePartCoveringAllPartsInPartition(partition_id, drop_range, true); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); - if (drop_range.getBlocksCount() > 1) + if (partition_was_not_empty) { std::lock_guard merge_selecting_lock(merge_selecting_mutex); queue.disableMergesInBlockRange(drop_range_fake_part_name); @@ -6318,12 +6379,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta ReplicatedMergeTreeLogEntryData entry; { - MergeTreePartInfo drop_range_dest; - drop_range_dest.partition_id = drop_range.partition_id; - drop_range_dest.max_block = drop_range.max_block; - drop_range_dest.min_block = drop_range.max_block; //FIXME typo? - drop_range_dest.level = drop_range.level; - drop_range_dest.mutation = drop_range.mutation; + MergeTreePartInfo drop_range_dest = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(drop_range.partition_id); entry.type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; entry.source_replica = dest_table_storage->replica_name; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c70556f40df..7094770319a 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -609,7 +609,7 @@ private: /// Produce an imaginary part info covering all parts in the specified partition (at the call moment). /// Returns false if the partition doesn't exist yet. - bool getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, bool for_replace_partition = false); + bool getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, bool for_replace_range = false); /// Check for a node in ZK. If it is, remember this information, and then immediately answer true. mutable std::unordered_set existing_nodes_cache; diff --git a/tests/queries/0_stateless/01154_move_partition.sh b/tests/queries/0_stateless/01154_move_partition.sh index e5134b1f773..f0e70d125c6 100755 --- a/tests/queries/0_stateless/01154_move_partition.sh +++ b/tests/queries/0_stateless/01154_move_partition.sh @@ -67,7 +67,7 @@ function drop_partition_thread() while true; do REPLICA=$(($RANDOM % 16)) PARTITION=$(($RANDOM % 10)) - $CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$TO_REPLICA DROP PARTITION $PARTITION" 2>/dev/null + $CLICKHOUSE_CLIENT -q "ALTER TABLE dst_$REPLICA DROP PARTITION $PARTITION" 2>/dev/null sleep 0.$RANDOM; done } From ee46850112dcedbfb0d21db01525eaa39484bf56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 13 May 2021 15:26:10 +0300 Subject: [PATCH 036/204] Better cmake for keeper --- programs/CMakeLists.txt | 48 +++++++++++++++++++ programs/keeper/CMakeLists.txt | 8 ++++ .../{server => keeper}/keeper_embedded.xml | 0 programs/server/CMakeLists.txt | 35 +------------- 4 files changed, 57 insertions(+), 34 deletions(-) rename programs/{server => keeper}/keeper_embedded.xml (100%) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 500e4794335..6fd4c2050b4 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -198,6 +198,54 @@ macro(clickhouse_program_add name) clickhouse_program_add_executable(${name}) endmacro() +# Embed default config files as a resource into the binary. +# This is needed for two purposes: +# 1. Allow to run the binary without download of any other files. +# 2. Allow to implement "sudo clickhouse install" tool. +# +# Arguments: target (server, client, keeper, etc.) and list of files +# +# Also dependency on TARGET_FILE is required, look at examples in programs/server and programs/keeper +macro(clickhouse_embed_binaries) + # TODO We actually need this on Mac, FreeBSD. + if (OS_LINUX) + + set(arguments_list "${ARGN}") + list(GET arguments_list 0 target) + + # for some reason cmake iterates loop including + math(EXPR arguments_count "${ARGC}-1") + + foreach(RESOURCE_POS RANGE 1 "${arguments_count}") + list(GET arguments_list "${RESOURCE_POS}" RESOURCE_FILE) + set(RESOURCE_OBJ ${RESOURCE_FILE}.o) + set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ}) + + # https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake + # PPC64LE fails to do this with objcopy, use ld or lld instead + if (ARCH_PPC64LE) + add_custom_command(OUTPUT ${RESOURCE_OBJ} + COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" ${RESOURCE_FILE}) + else() + add_custom_command(OUTPUT ${RESOURCE_OBJ} + COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" + COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents + "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}") + endif() + set_source_files_properties(${RESOURCE_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true) + endforeach() + + add_library(clickhouse_${target}_configs STATIC ${RESOURCE_OBJS}) + set_target_properties(clickhouse_${target}_configs PROPERTIES LINKER_LANGUAGE C) + + # whole-archive prevents symbols from being discarded for unknown reason + # CMake can shuffle each of target_link_libraries arguments with other + # libraries in linker command. To avoid this we hardcode whole-archive + # library into single string. + add_dependencies(clickhouse-${target}-lib clickhouse_${target}_configs) + endif () +endmacro() + add_subdirectory (server) add_subdirectory (client) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 211bd74ba3e..e604d0e304e 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -2,6 +2,10 @@ set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp ) +if (OS_LINUX) + set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") +endif () + set (CLICKHOUSE_KEEPER_LINK PRIVATE clickhouse_common_config @@ -9,8 +13,12 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_common_zookeeper daemon dbms + + ${LINK_RESOURCE_LIB} ) clickhouse_program_add(keeper) install (FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) + +clickhouse_embed_binaries(keeper keeper_config.xml keeper_embedded.xml) diff --git a/programs/server/keeper_embedded.xml b/programs/keeper/keeper_embedded.xml similarity index 100% rename from programs/server/keeper_embedded.xml rename to programs/keeper/keeper_embedded.xml diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index bc7f2d94153..f7f76fdb450 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -31,37 +31,4 @@ clickhouse_program_add(server) install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) -# TODO We actually need this on Mac, FreeBSD. -if (OS_LINUX) - # Embed default config files as a resource into the binary. - # This is needed for two purposes: - # 1. Allow to run the binary without download of any other files. - # 2. Allow to implement "sudo clickhouse install" tool. - - foreach(RESOURCE_FILE config.xml users.xml embedded.xml play.html keeper_embedded.xml) - set(RESOURCE_OBJ ${RESOURCE_FILE}.o) - set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ}) - - # https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake - # PPC64LE fails to do this with objcopy, use ld or lld instead - if (ARCH_PPC64LE) - add_custom_command(OUTPUT ${RESOURCE_OBJ} - COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" ${RESOURCE_FILE}) - else() - add_custom_command(OUTPUT ${RESOURCE_OBJ} - COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" - COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents - "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}") - endif() - set_source_files_properties(${RESOURCE_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true) - endforeach(RESOURCE_FILE) - - add_library(clickhouse_server_configs STATIC ${RESOURCE_OBJS}) - set_target_properties(clickhouse_server_configs PROPERTIES LINKER_LANGUAGE C) - - # whole-archive prevents symbols from being discarded for unknown reason - # CMake can shuffle each of target_link_libraries arguments with other - # libraries in linker command. To avoid this we hardcode whole-archive - # library into single string. - add_dependencies(clickhouse-server-lib clickhouse_server_configs) -endif () +clickhouse_embed_binaries(server config.xml users.xml embedded.xml play.html) From 2da12f0b59d5a52cc937e2da9991419646721cbf Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 13 May 2021 17:04:36 +0300 Subject: [PATCH 037/204] fix intersection of drop range and merge --- src/Storages/MergeTree/MergeTreeData.cpp | 19 ++-- src/Storages/MergeTree/MergeTreeData.h | 3 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 22 +---- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 27 +----- .../MergeTree/ReplicatedMergeTreeQueue.h | 3 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 95 ++++++++++--------- src/Storages/StorageReplicatedMergeTree.h | 5 +- 8 files changed, 68 insertions(+), 108 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 49fa885ac62..f7f19df0a81 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2261,7 +2261,7 @@ void MergeTreeData::removePartsFromWorkingSet(const DataPartsVector & remove, bo } MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout, - bool skip_intersecting_parts, DataPartsLock & lock) + DataPartsLock & lock) { DataPartsVector parts_to_remove; @@ -2275,16 +2275,13 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c if (part->info.partition_id != drop_range.partition_id) throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); - if (part->info.min_block < drop_range.min_block) /// Always false, because drop_range.min_block == 0 + if (part->info.min_block < drop_range.min_block) /// NOTE Always false, because drop_range.min_block == 0 { if (drop_range.min_block <= part->info.max_block) { /// Intersect left border - String error = "Unexpected merged part " + part->name + " intersecting drop range " + drop_range.getPartName(); - if (!skip_intersecting_parts) - throw Exception(error, ErrorCodes::LOGICAL_ERROR); - - LOG_WARNING(log, error); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}", + part->name, drop_range.getPartName()); } continue; @@ -2297,12 +2294,8 @@ MergeTreeData::DataPartsVector MergeTreeData::removePartsInRangeFromWorkingSet(c if (part->info.min_block <= drop_range.max_block && drop_range.max_block < part->info.max_block) { /// Intersect right border - String error = "Unexpected merged part " + part->name + " intersecting drop range " + drop_range.getPartName(); - if (!skip_intersecting_parts) - throw Exception(error, ErrorCodes::LOGICAL_ERROR); - - LOG_WARNING(log, error); - continue; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}", + part->name, drop_range.getPartName()); } if (part->getState() != DataPartState::Deleting) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 46c0014d9f7..d74aac81fff 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -476,10 +476,9 @@ public: /// Removes all parts from the working set parts /// for which (partition_id = drop_range.partition_id && min_block >= drop_range.min_block && max_block <= drop_range.max_block). - /// If a part intersecting drop_range.max_block is found, an exception will be thrown. /// Used in REPLACE PARTITION command; DataPartsVector removePartsInRangeFromWorkingSet(const MergeTreePartInfo & drop_range, bool clear_without_timeout, - bool skip_intersecting_parts, DataPartsLock & lock); + DataPartsLock & lock); /// Renames the part to detached/_ and removes it from data_parts, //// so it will not be deleted in clearOldParts. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index f9278347201..385f3182729 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -236,7 +236,6 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// Previous part only in boundaries of partition frame const MergeTreeData::DataPartPtr * prev_part = nullptr; - String range_str; size_t parts_selected_precondition = 0; for (const MergeTreeData::DataPartPtr & part : data_parts) { @@ -245,11 +244,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (!prev_partition_id || partition_id != *prev_partition_id) { if (parts_ranges.empty() || !parts_ranges.back().empty()) - { - LOG_DEBUG(log, "selectPartsToMerge 1: range {}", range_str); - range_str.clear(); parts_ranges.emplace_back(); - } + /// New partition frame. prev_partition_id = &partition_id; prev_part = nullptr; @@ -262,10 +258,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( * So we have to check if this part is currently being inserted with quorum and so on and so forth. * Obviously we have to check it manually only for the first part * of each partition because it will be automatically checked for a pair of parts. */ - String reason; - bool can = can_merge_callback(nullptr, part, &reason); - LOG_DEBUG(log, "Can merge single part {}: {} {}", part->name, can, reason); - if (!can) + if (!can_merge_callback(nullptr, part, nullptr)) continue; } @@ -273,15 +266,10 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( { /// If we cannot merge with previous part we had to start new parts /// interval (in the same partition) - String reason; - bool can = can_merge_callback(*prev_part, part, &reason); - LOG_DEBUG(log, "Can merge {} and {}: {} {}", (*prev_part)->name, part->name, can, reason); - if (!can) + if (!can_merge_callback(*prev_part, part, nullptr)) { /// Starting new interval in the same partition assert(!parts_ranges.back().empty()); - LOG_DEBUG(log, "selectPartsToMerge 2: range {}", range_str); - range_str.clear(); parts_ranges.emplace_back(); /// Now we have no previous part, but it affects only logging @@ -300,7 +288,6 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( ++parts_selected_precondition; - range_str += part->name + " "; parts_ranges.back().emplace_back(part_info); /// Check for consistency of data parts. If assertion is failed, it requires immediate investigation. @@ -313,9 +300,6 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( prev_part = ∂ } - LOG_DEBUG(log, "selectPartsToMerge 3: range {}", range_str); - - LOG_DEBUG(log, "selectPartsToMerge: {} ranges", parts_ranges.size()); if (parts_selected_precondition == 0) { if (out_disable_reason) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 1327ec72eb7..fd09fb0039c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1579,13 +1579,10 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep } -void ReplicatedMergeTreeQueue::disableMergesInBlockRange(const String & part_name) +void ReplicatedMergeTreeQueue::disableMergesInBlockRangeOnLocalReplica(const String & part_name) { - { - std::lock_guard lock(state_mutex); - virtual_parts.add(part_name); - } - std::this_thread::sleep_for(std::chrono::milliseconds(1000)); //FIXME + std::lock_guard lock(state_mutex); + virtual_parts.add(part_name); } @@ -1817,24 +1814,6 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( } else inprogress_quorum_part.clear(); - - String blocks_str; - for (const auto & partition : committing_blocks) - { - blocks_str += partition.first; - blocks_str += " ("; - for (const auto & num : partition.second) - blocks_str += toString(num); - blocks_str += + ") "; - } - ActiveDataPartSet virtual_parts(queue.format_version); - { - std::lock_guard lock(queue.state_mutex); - virtual_parts = queue.virtual_parts; - } - - LOG_DEBUG(queue.log, "MergePredicate: ver {},\t prev_virt {},\t comm {},\t, virt {},\t iqp {}", - merges_version, boost::algorithm::join(prev_virtual_parts.getParts(), ", "), blocks_str, boost::algorithm::join(virtual_parts.getParts(), ", "), inprogress_quorum_part); } bool ReplicatedMergeTreeMergePredicate::operator()( diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 5c59e700db9..a902b2dad93 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -381,7 +381,8 @@ public: /// Add part to virtual_parts, which means that part must exist /// after processing replication log up to log_pointer. /// Part maybe fake (look at ReplicatedMergeTreeMergePredicate). - void disableMergesInBlockRange(const String & part_name); + /// NOTE It does not disable merges on other leader replicas. + void disableMergesInBlockRangeOnLocalReplica(const String & part_name); /// Checks that part is already in virtual parts bool isVirtualPart(const MergeTreeData::DataPartPtr & data_part) const; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index f34e93e212f..6cda809f11e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1357,7 +1357,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con /// If it is REPLACE (not ATTACH), remove all parts which max_block_number less then min_block_number of the first new block if (replace) - removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); } PartLog::addNewParts(getContext(), dst_parts, watch.elapsed()); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2d9d4b6fd4e..a3c7bd3e678 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2137,7 +2137,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) DataPartsVector parts_to_remove; { auto data_parts_lock = lockParts(); - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range_info, true, true, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(drop_range_info, true, data_parts_lock); } if (entry.detach) @@ -2250,7 +2250,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (parts_to_add.empty() && replace) { - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); String parts_to_remove_str; for (const auto & part : parts_to_remove) { @@ -2493,7 +2493,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) transaction.commit(&data_parts_lock); if (replace) { - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); String parts_to_remove_str; for (const auto & part : parts_to_remove) { @@ -4774,7 +4774,8 @@ static String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, return part_info.getPartName(); } -bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, bool for_replace_range) +bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, + std::optional & delimiting_block_lock, bool for_replace_range) { /// Even if there is no data in the partition, you still need to mark the range for deletion. /// - Because before executing DETACH, tasks for downloading parts to this partition can be executed. @@ -4791,32 +4792,31 @@ bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const St { auto zookeeper = getZooKeeper(); - auto block_number_lock = allocateBlockNumber(partition_id, zookeeper); - right = block_number_lock->getNumber(); - block_number_lock->unlock(); + delimiting_block_lock = allocateBlockNumber(partition_id, zookeeper); + right = delimiting_block_lock->getNumber(); mutation_version = queue.getCurrentMutationVersion(partition_id, right); } + if (for_replace_range) + { + /// NOTE Undo max block number decrement for REPLACE_RANGE, because there are invariants: + /// - drop range for REPLACE PARTITION must contain at least 2 blocks (1 skipped block and at least 1 real block) + /// - drop range for MOVE PARTITION/ATTACH PARTITION FROM always contains 1 block + + /// REPLACE/MOVE PARTITION uses different max level for unknown (probably historical) reason. + auto max_level = std::numeric_limits::max(); + part_info = MergeTreePartInfo(partition_id, left, right, max_level, mutation_version); + return right != 0; + } + /// Empty partition. if (right == 0) return false; --right; - decltype(part_info.level) max_level = MergeTreePartInfo::MAX_LEVEL; - if (for_replace_range) - { - /// REPLACE/MOVE PARTITION uses different max level for unknown (probably historical) reason. - max_level = std::numeric_limits::max(); - - /// NOTE Undo max block number decrement for REPLACE_RANGE, because there are invariants: - /// - drop range for REPLACE PARTITION must contain at least 2 blocks (1 skipped block and at least 1 real block) - /// - drop range for MOVE PARTITION/ATTACH PARTITION FROM always contains 1 block - ++right; - } - /// Artificial high level is chosen, to make this part "covering" all parts inside. - part_info = MergeTreePartInfo(partition_id, left, right, max_level, mutation_version); + part_info = MergeTreePartInfo(partition_id, left, right, MergeTreePartInfo::MAX_LEVEL, mutation_version); return true; } @@ -6117,7 +6117,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// So, such case has special meaning, if drop_range contains only one block it means that nothing to drop. /// TODO why not to add normal DROP_RANGE entry to replication queue if `replace` is true? MergeTreePartInfo drop_range; - bool partition_was_empty = !getFakePartCoveringAllPartsInPartition(partition_id, drop_range, true); + std::optional delimiting_block_lock; + bool partition_was_empty = !getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); if (replace && partition_was_empty) { /// Nothing to drop, will just attach new parts @@ -6128,7 +6129,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( if (!replace) { /// It's ATTACH PARTITION FROM, not REPLACE PARTITION. We have to reset drop range - drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(drop_range.partition_id); + drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); } assert(drop_range.getBlocksCount() > 0); @@ -6138,17 +6139,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( if (replace) { - /// We have to prohibit merges in drop_range, since new merge log entry appeared after this REPLACE FROM entry - /// could produce new merged part instead in place of just deleted parts. - /// It is better to prohibit them on leader replica (like DROP PARTITION makes), - /// but it is inconvenient for a user since he could actually use source table from this replica. - /// Therefore prohibit merges on the initializer server now and on the remaining servers when log entry will be executed. - /// It does not provides strong guarantees, but is suitable for intended use case (assume merges are quite rare). - - { - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInBlockRange(drop_range_fake_part_name); - } + /// Optional step + std::lock_guard merge_selecting_lock(merge_selecting_mutex); + queue.disableMergesInBlockRangeOnLocalReplica(drop_range_fake_part_name); } for (const auto & src_part : src_all_parts) @@ -6242,6 +6235,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); + delimiting_block_lock->getUnlockOps(ops); Transaction transaction(*this); { @@ -6252,13 +6246,14 @@ void StorageReplicatedMergeTree::replacePartitionFrom( } op_results = zookeeper->multi(ops); + delimiting_block_lock->assumeUnlocked(); { auto data_parts_lock = lockParts(); transaction.commit(&data_parts_lock); if (replace) - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, false, data_parts_lock); + parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, data_parts_lock); } PartLog::addNewParts(getContext(), dst_parts, watch.elapsed()); @@ -6329,13 +6324,15 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta /// A range for log entry to remove parts from the source table (myself). MergeTreePartInfo drop_range; - bool partition_was_not_empty = getFakePartCoveringAllPartsInPartition(partition_id, drop_range, true); + std::optional delimiting_block_lock; + bool partition_was_not_empty = getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); if (partition_was_not_empty) { + /// Optional step std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInBlockRange(drop_range_fake_part_name); + queue.disableMergesInBlockRangeOnLocalReplica(drop_range_fake_part_name); } /// Clone parts into destination table. @@ -6379,7 +6376,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta ReplicatedMergeTreeLogEntryData entry; { - MergeTreePartInfo drop_range_dest = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(drop_range.partition_id); + MergeTreePartInfo drop_range_dest = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); entry.type = ReplicatedMergeTreeLogEntryData::REPLACE_RANGE; entry.source_replica = dest_table_storage->replica_name; @@ -6438,7 +6435,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta op_results = zookeeper->multi(ops); - parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, false, lock); + parts_to_remove = removePartsInRangeFromWorkingSet(drop_range, true, lock); transaction.commit(&lock); } @@ -6467,13 +6464,15 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta dest_table_storage->waitForAllReplicasToProcessLogEntry(entry); } - Coordination::Requests ops_dest; + Coordination::Requests ops_src; - ops_dest.emplace_back(zkutil::makeCreateRequest( + ops_src.emplace_back(zkutil::makeCreateRequest( zookeeper_path + "/log/log-", entry_delete.toString(), zkutil::CreateMode::PersistentSequential)); - ops_dest.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version + ops_src.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version + delimiting_block_lock->getUnlockOps(ops_src); - op_results = zookeeper->multi(ops_dest); + op_results = zookeeper->multi(ops_src); + delimiting_block_lock->assumeUnlocked(); log_znode_path = dynamic_cast(*op_results.front()).path_created; entry_delete.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); @@ -6706,7 +6705,9 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, ContextPtr query_context, bool detach) { MergeTreePartInfo drop_range_info; - if (!getFakePartCoveringAllPartsInPartition(partition_id, drop_range_info)) + /// It prevent other replicas from assigning merges which intersect locked block number. + std::optional delimiting_block_lock; + if (!getFakePartCoveringAllPartsInPartition(partition_id, drop_range_info, delimiting_block_lock)) { LOG_INFO(log, "Will not drop partition {}, it is empty.", partition_id); return false; @@ -6714,13 +6715,11 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( clearBlocksInPartition(zookeeper, partition_id, drop_range_info.min_block, drop_range_info.max_block); - /** Forbid to choose the parts to be deleted for merging. - * Invariant: after the `DROP_RANGE` entry appears in the log, merge of deleted parts will not appear in the log. - */ String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range_info); { + /// Optional step std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInBlockRange(drop_range_fake_part_name); + queue.disableMergesInBlockRangeOnLocalReplica(drop_range_fake_part_name); } LOG_DEBUG(log, "Disabled merges covered by range {}", drop_range_fake_part_name); @@ -6735,9 +6734,11 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version. + delimiting_block_lock->getUnlockOps(ops); if (auto txn = query_context->getZooKeeperMetadataTransaction()) txn->moveOpsTo(ops); Coordination::Responses responses = zookeeper.multi(ops); + delimiting_block_lock->assumeUnlocked(); String log_znode_path = dynamic_cast(*responses.front()).path_created; entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 7094770319a..27d4e38d454 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -609,7 +609,10 @@ private: /// Produce an imaginary part info covering all parts in the specified partition (at the call moment). /// Returns false if the partition doesn't exist yet. - bool getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, bool for_replace_range = false); + /// Caller must hold delimiting_block_lock until creation of drop/replace entry in log. + /// Otherwise some replica may assign merge which intersects part_info. + bool getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, + std::optional & delimiting_block_lock, bool for_replace_range = false); /// Check for a node in ZK. If it is, remember this information, and then immediately answer true. mutable std::unordered_set existing_nodes_cache; From eaffe6848be8b64ca1618c5a5b1be0ac9003ac57 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 13 May 2021 17:23:00 +0300 Subject: [PATCH 038/204] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 11 +++++++---- tests/queries/0_stateless/01154_move_partition.sh | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a3c7bd3e678..39f4fd272f8 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2175,10 +2175,13 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) auto metadata_snapshot = getInMemoryMetadataPtr(); MergeTreePartInfo drop_range = MergeTreePartInfo::fromPartName(entry_replace.drop_range_part_name, format_version); - /// Range with only one block has special meaning ATTACH PARTITION - bool replace = drop_range.getBlocksCount() > 1; //FIXME + /// Range with only one block has special meaning: it's ATTACH PARTITION or MOVE PARTITION, so there is no drop range + bool replace = drop_range.getBlocksCount() > 1; - queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); //FIXME + if (replace) + queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); + else + drop_range = {}; struct PartDescription { @@ -6233,9 +6236,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( if (auto txn = query_context->getZooKeeperMetadataTransaction()) txn->moveOpsTo(ops); + delimiting_block_lock->getUnlockOps(ops); ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - delimiting_block_lock->getUnlockOps(ops); Transaction transaction(*this); { diff --git a/tests/queries/0_stateless/01154_move_partition.sh b/tests/queries/0_stateless/01154_move_partition.sh index f0e70d125c6..6d4fef3876b 100755 --- a/tests/queries/0_stateless/01154_move_partition.sh +++ b/tests/queries/0_stateless/01154_move_partition.sh @@ -92,7 +92,7 @@ export -f replace_partition_src_src_thread; export -f drop_partition_thread; export -f optimize_thread; -TIMEOUT=100 +TIMEOUT=60 #timeout $TIMEOUT bash -c "create_drop_thread ${engines[@]}" & timeout $TIMEOUT bash -c 'insert_thread src' & From dc6eb48197fb5b9bbb538c5f047777f1e5c62850 Mon Sep 17 00:00:00 2001 From: Evgeniia Sudarikova Date: Thu, 13 May 2021 18:05:48 +0300 Subject: [PATCH 039/204] edited after review --- .../functions/type-conversion-functions.md | 9 +++++--- .../functions/type-conversion-functions.md | 22 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 87dbfa1728f..8a477b79836 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -469,7 +469,8 @@ Converts input value `x` to the `T` data type. Unlike to `reinterpret` function, The syntax `CAST(x AS t)` is also supported. -Note, that if value `x` does not fit the bounds of type `T`, the function overflows. For example, `CAST(-1, 'UInt8')` returns `255`. +!!! note "Note" + If value `x` does not fit the bounds of type `T`, the function overflows. For example, `CAST(-1, 'UInt8')` returns `255`. **Syntax** @@ -1167,8 +1168,10 @@ Result: ## toUnixTimestamp64Nano {#tounixtimestamp64nano} -Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. -Input value is scaled up or down appropriately depending on it precision. Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`. +Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. + +!!! note "Note" + Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`. **Syntax** diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 46032f29551..d86de71ec72 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -369,7 +369,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; ## reinterpretAsUUID {#reinterpretasuuid} -Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количеством нулевых байтов с конца. Если строка длиннее, чем шестнадцать байтов, то игнорируются лишние байты с конца. +Функция принимает строку из 16 байт и интерпретирует ее байты в порядок байт от старшего к младшему. Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количеством нулевых байтов с конца. Если строка длиннее, чем 16 байтов, то лишние байты с конца игнорируются. **Синтаксис** @@ -466,7 +466,8 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, Поддерживается также синтаксис `CAST(x AS t)`. -Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255. +!!! note "Примечание" + Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255. **Синтаксис** @@ -479,7 +480,7 @@ CAST(x, T) - `x` — любой тип данных. - `T` — конечный тип данных. [String](../../sql-reference/data-types/string.md). -**Returned value** +**Возвращаемое значение** - Конечный тип данных. @@ -523,7 +524,7 @@ SELECT Преобразование в FixedString(N) работает только для аргументов типа [String](../../sql-reference/data-types/string.md) или [FixedString](../../sql-reference/data-types/fixedstring.md). -Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. +Поддерживается преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. **Примеры** @@ -893,7 +894,7 @@ AS parseDateTimeBestEffortUS; ## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero} ## parseDateTime32BestEffortOrZero {#parsedatetime32besteffortorzero} -Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевую дату или нулевую дату и время, когда получает формат даты, который не может быть обработан. +Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевую дату или нулевую дату и время в случае, если получает формат даты, который не может быть обработан. ## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull} @@ -1077,7 +1078,7 @@ SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOr parseDateTime64BestEffort(time_string [, precision [, time_zone]]) ``` -**Параметры** +**Аргументы** - `time_string` — строка, содержащая дату или дату со временем, которые нужно преобразовать. [String](../../sql-reference/data-types/string.md). - `precision` — `3` для миллисекунд, `6` для микросекунд. По умолчанию `3`. Необязательный. [UInt8](../../sql-reference/data-types/int-uint.md). @@ -1115,11 +1116,11 @@ FORMAT PrettyCompactMonoBlcok; ## parseDateTime64BestEffortOrNull {#parsedatetime32besteffortornull} -Работает аналогично функции [parseDateTime64BestEffort](#parsedatetime64besteffort), но возвращает `NULL`, когда встречает формат даты, который не может обработать. +Работает аналогично функции [parseDateTime64BestEffort](#parsedatetime64besteffort), но возвращает `NULL` в случае, если встречает формат даты, который не может обработать. ## parseDateTime64BestEffortOrZero {#parsedatetime64besteffortorzero} -Работает аналогично функции [parseDateTime64BestEffort](#parsedatetimebesteffort), но возвращает "нулевую" дату и время, когда встречает формат даты, который не может обработать. +Работает аналогично функции [parseDateTime64BestEffort](#parsedatetimebesteffort), но возвращает нулевую дату и время в случае, если встречает формат даты, который не может обработать. ## toLowCardinality {#tolowcardinality} @@ -1166,7 +1167,10 @@ SELECT toLowCardinality('1'); ## toUnixTimestamp64Nano {#tounixtimestamp64nano} Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. -Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. Обратите внимание, что возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. +Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. + +!!! note "Примечание" + Обратите внимание, что возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. **Синтаксис** From 1330ef7567423b3e063b6062218df1b39cb96dfd Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 13 May 2021 20:04:44 +0300 Subject: [PATCH 040/204] Fixed converting const DateTime to DateTime64 in WHERE --- src/Interpreters/convertFieldToType.cpp | 8 +++- ...866_datetime64_cmp_with_constant.reference | 12 ++++++ .../01866_datetime64_cmp_with_constant.sql | 40 +++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01866_datetime64_cmp_with_constant.reference create mode 100644 tests/queries/0_stateless/01866_datetime64_cmp_with_constant.sql diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index fa49b730379..0b124634fec 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -201,7 +201,13 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return src; } - /// TODO Conversion from integers to DateTime64 + if (which_type.isDateTime64() + && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDateOrDateTime())) + { + const auto scale = static_cast(type).getScale(); + const auto decimal_value = DecimalUtils::decimalFromComponents(src.reinterpret(), 0, scale); + return Field(DecimalField(decimal_value, scale)); + } } else if (which_type.isUUID() && src.getType() == Field::Types::UUID) { diff --git a/tests/queries/0_stateless/01866_datetime64_cmp_with_constant.reference b/tests/queries/0_stateless/01866_datetime64_cmp_with_constant.reference new file mode 100644 index 00000000000..db516fa83d4 --- /dev/null +++ b/tests/queries/0_stateless/01866_datetime64_cmp_with_constant.reference @@ -0,0 +1,12 @@ +dt64 <= const dt +dt64 <= dt +dt <= const dt64 +dt <= dt64 +dt64 = const dt +dt64 = dt +dt = const dt64 +dt = dt64 +dt64 >= const dt +dt64 >= dt +dt >= const dt64 +dt >= dt64 diff --git a/tests/queries/0_stateless/01866_datetime64_cmp_with_constant.sql b/tests/queries/0_stateless/01866_datetime64_cmp_with_constant.sql new file mode 100644 index 00000000000..e6782656887 --- /dev/null +++ b/tests/queries/0_stateless/01866_datetime64_cmp_with_constant.sql @@ -0,0 +1,40 @@ +CREATE TABLE dt64test +( + `dt64_column` DateTime64(3), + `dt_column` DateTime DEFAULT toDateTime(dt64_column) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(dt64_column) +ORDER BY dt64_column; + +INSERT INTO dt64test (`dt64_column`) VALUES ('2020-01-13 13:37:00'); + +SELECT 'dt64 < const dt' FROM dt64test WHERE dt64_column < toDateTime('2020-01-13 13:37:00'); +SELECT 'dt64 < dt' FROM dt64test WHERE dt64_column < materialize(toDateTime('2020-01-13 13:37:00')); +SELECT 'dt < const dt64' FROM dt64test WHERE dt_column < toDateTime64('2020-01-13 13:37:00', 3); +SELECT 'dt < dt64' FROM dt64test WHERE dt_column < materialize(toDateTime64('2020-01-13 13:37:00', 3)); + +SELECT 'dt64 <= const dt' FROM dt64test WHERE dt64_column <= toDateTime('2020-01-13 13:37:00'); +SELECT 'dt64 <= dt' FROM dt64test WHERE dt64_column <= materialize(toDateTime('2020-01-13 13:37:00')); +SELECT 'dt <= const dt64' FROM dt64test WHERE dt_column <= toDateTime64('2020-01-13 13:37:00', 3); +SELECT 'dt <= dt64' FROM dt64test WHERE dt_column <= materialize(toDateTime64('2020-01-13 13:37:00', 3)); + +SELECT 'dt64 = const dt' FROM dt64test WHERE dt64_column = toDateTime('2020-01-13 13:37:00'); +SELECT 'dt64 = dt' FROM dt64test WHERE dt64_column = materialize(toDateTime('2020-01-13 13:37:00')); +SELECT 'dt = const dt64' FROM dt64test WHERE dt_column = toDateTime64('2020-01-13 13:37:00', 3); +SELECT 'dt = dt64' FROM dt64test WHERE dt_column = materialize(toDateTime64('2020-01-13 13:37:00', 3)); + +SELECT 'dt64 >= const dt' FROM dt64test WHERE dt64_column >= toDateTime('2020-01-13 13:37:00'); +SELECT 'dt64 >= dt' FROM dt64test WHERE dt64_column >= materialize(toDateTime('2020-01-13 13:37:00')); +SELECT 'dt >= const dt64' FROM dt64test WHERE dt_column >= toDateTime64('2020-01-13 13:37:00', 3); +SELECT 'dt >= dt64' FROM dt64test WHERE dt_column >= materialize(toDateTime64('2020-01-13 13:37:00', 3)); + +SELECT 'dt64 > const dt' FROM dt64test WHERE dt64_column > toDateTime('2020-01-13 13:37:00'); +SELECT 'dt64 > dt' FROM dt64test WHERE dt64_column > materialize(toDateTime('2020-01-13 13:37:00')); +SELECT 'dt > const dt64' FROM dt64test WHERE dt_column > toDateTime64('2020-01-13 13:37:00', 3); +SELECT 'dt > dt64' FROM dt64test WHERE dt_column > materialize(toDateTime64('2020-01-13 13:37:00', 3)); + +SELECT 'dt64 != const dt' FROM dt64test WHERE dt64_column != toDateTime('2020-01-13 13:37:00'); +SELECT 'dt64 != dt' FROM dt64test WHERE dt64_column != materialize(toDateTime('2020-01-13 13:37:00')); +SELECT 'dt != const dt64' FROM dt64test WHERE dt_column != toDateTime64('2020-01-13 13:37:00', 3); +SELECT 'dt != dt64' FROM dt64test WHERE dt_column != materialize(toDateTime64('2020-01-13 13:37:00', 3)); From 6100245f7511c6df45909b8c6250e85bcea453d8 Mon Sep 17 00:00:00 2001 From: adevyatova Date: Thu, 13 May 2021 17:45:26 +0000 Subject: [PATCH 041/204] update --- .../settings/merge-tree-settings.md | 12 +++---- docs/en/operations/settings/settings.md | 33 +++++++++++++++++++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index f36d46c4d3d..76e58e10563 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -115,9 +115,9 @@ Default value: 604800 (1 week). Similar to [replicated_deduplication_window](#replicated-deduplication-window), `replicated_deduplication_window_seconds` specifies how long to store hash sums of blocks for insert deduplication. Hash sums older than `replicated_deduplication_window_seconds` are removed from Zookeeper, even if they are less than ` replicated_deduplication_window`. -## replicated_fetches_http_connection_timeout +## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout} -HTTP connection timeout (in seconds) for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly. +HTTP connection timeout (in seconds) for part fetch requests. Inherited from default profile [http_connection_timeout](./settings.md#http_connection_timeout) if not set explicitly. Possible values: @@ -126,9 +126,9 @@ Possible values: Default value: 0. -## replicated_fetches_http_send_timeout +## replicated_fetches_http_send_timeout {#replicated_fetches_http_send_timeout} -HTTP send timeout (in seconds) for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly. +HTTP send timeout (in seconds) for part fetch requests. Inherited from default profile [http_send_timeout](./settings.md#http_send_timeout) if not set explicitly. Possible values: @@ -137,9 +137,9 @@ Possible values: Default value: 0. -## replicated_fetches_http_receive_timeout +## replicated_fetches_http_receive_timeout {#replicated_fetches_http_receive_timeout} -HTTP receive timeout (in seconds) for fetch part requests. Inherited from default profile `http_receive_timeout` if not set explicitly. +HTTP receive timeout (in seconds) for fetch part requests. Inherited from default profile [http_receive_timeout](./settings.md#http_receive_timeout) if not set explicitly. Possible values: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a5c3902f8f2..e78c9a5949f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2822,4 +2822,37 @@ Sets the interval in seconds after which periodically refreshed [live view](../. Default value: `60`. +## http_connection_timeout {#http_connection_timeout} + +HTTP connection timeout (in seconds). + +Possible values: + +- Any positive integer. +- 0 - Disabled. + +Default value: 0. + +## http_send_timeout {#replicated_fetches_http_send_timeout} + +HTTP send timeout (in seconds). + +Possible values: + +- Any positive integer. +- 0 - Disabled. + +Default value: 0. + +## http_receive_timeout {#http_receive_timeout} + +HTTP receive timeout (in seconds). + +Possible values: + +- Any positive integer. +- 0 - Disabled. + +Default value: 0. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) From 4ee1896fafa64c3330ad2499aeb0311d816d220e Mon Sep 17 00:00:00 2001 From: adevyatova Date: Thu, 13 May 2021 18:08:38 +0000 Subject: [PATCH 042/204] add ru docs --- .../settings/merge-tree-settings.md | 33 +++++++++++++++++++ docs/ru/operations/settings/settings.md | 32 ++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index f9093d379e3..c610828fc37 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -149,6 +149,39 @@ Eсли суммарное число активных кусков во все Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже. Экспериментально было найдено время - 480 секунд, за которое гарантированно новый кусок будет записан на диск. +## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout} + +Тайм-аут HTTP-соединения (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout) если не задан явно. + +Возможные значения: + +- 0 - тайм-аут не задан. +- Любое положительное целое число. + +Значение по умолчанию: `0`. + +## replicated_fetches_http_send_timeout {#replicated_fetches_http_send_timeout} + +Тайм-аут отправки HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout) если не задан явно. + +Возможные значения: + +- 0 - тайм-аут не задан. +- Любое положительное целое число. + +Значение по умолчанию: `0`. + +## replicated_fetches_http_receive_timeout {#replicated_fetches_http_receive_timeout} + +Тайм-аут приёма HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout) если не задан явно. + +Возможные значения: + +- 0 - тайм-аут не задан. +- Любое положительное целое число. + +Значение по умолчанию: `0`. + ## max_bytes_to_merge_at_max_space_in_pool {#max-bytes-to-merge-at-max-space-in-pool} Максимальный суммарный размер кусков (в байтах) в одном слиянии, при наличии свободных ресурсов в фоновом пуле. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 467d27dad32..26513350b63 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2857,5 +2857,37 @@ SELECT * FROM test LIMIT 10 OFFSET 100; │ 109 │ └─────┘ ``` +## http_connection_timeout {#http_connection_timeout} + +Тайм-аут HTTP-соединения (в секундах). + +Возможные значения: + +- 0 - тайм-аут не задан. +- Любое положительное целое число. + +Значение по умолчанию: `0`. + +## http_send_timeout {#replicated_fetches_http_send_timeout} + +Тайм-аут отправки HTTP (в секундах). + +Возможные значения: + +- 0 - тайм-аут не задан. +- Любое положительное целое число. + +Значение по умолчанию: `0`. + +## http_receive_timeout {#http_receive_timeout} + +Тайм-аут приема HTTP (в секундах). + +Возможные значения: + +- 0 - тайм-аут не задан. +- Любое положительное целое число. + +Значение по умолчанию: `0`. [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) From 2ef60f8eaa62803036278f013ef7a744ed2f7ba4 Mon Sep 17 00:00:00 2001 From: adevyatova Date: Thu, 13 May 2021 18:10:39 +0000 Subject: [PATCH 043/204] minor fixes --- docs/ru/operations/settings/merge-tree-settings.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index c610828fc37..a89140800eb 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -151,7 +151,7 @@ Eсли суммарное число активных кусков во все ## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout} -Тайм-аут HTTP-соединения (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout) если не задан явно. +Тайм-аут HTTP-соединения (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. Возможные значения: @@ -162,7 +162,7 @@ Eсли суммарное число активных кусков во все ## replicated_fetches_http_send_timeout {#replicated_fetches_http_send_timeout} -Тайм-аут отправки HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout) если не задан явно. +Тайм-аут отправки HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. Возможные значения: @@ -173,7 +173,7 @@ Eсли суммарное число активных кусков во все ## replicated_fetches_http_receive_timeout {#replicated_fetches_http_receive_timeout} -Тайм-аут приёма HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout) если не задан явно. +Тайм-аут приёма HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. Возможные значения: From 678705b00403afb402b0a5ab840d97d610b391e8 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:49:03 +0300 Subject: [PATCH 044/204] Update docs/ru/sql-reference/functions/ip-address-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/ip-address-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index 10ded819fef..b02d45d7667 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -399,7 +399,7 @@ SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0 Проверяет, попадает ли IP адрес в интервал, заданный в нотации [CIDR](https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing). -**Syntax** +**Синтаксис** ``` sql isIPAddressInRange(address, prefix) From cd10e84f5cd7b557c8b46f38d4c8a25508396d50 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:49:30 +0300 Subject: [PATCH 045/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index d86de71ec72..1e5788c70a4 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -369,7 +369,7 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; ## reinterpretAsUUID {#reinterpretasuuid} -Функция принимает строку из 16 байт и интерпретирует ее байты в порядок байт от старшего к младшему. Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количеством нулевых байтов с конца. Если строка длиннее, чем 16 байтов, то лишние байты с конца игнорируются. +Функция принимает строку из 16 байт и интерпретирует ее байты в порядок от старшего к младшему. Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количеством нулевых байтов с конца. Если строка длиннее, чем 16 байтов, то лишние байты с конца игнорируются. **Синтаксис** @@ -1336,4 +1336,3 @@ FROM numbers(3); │ 2,"good" │ └───────────────────────────────────────────┘ ``` - From 113063209327ebb9126a63c884bbde89afde70b4 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:49:42 +0300 Subject: [PATCH 046/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 1e5788c70a4..cf7eb760378 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -467,7 +467,7 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, Поддерживается также синтаксис `CAST(x AS t)`. !!! note "Примечание" - Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255. + Если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255. **Синтаксис** From 79f8265897658f435552684d090638068e5436ca Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:50:38 +0300 Subject: [PATCH 047/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index cf7eb760378..d75fb03d45a 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1081,7 +1081,7 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Аргументы** - `time_string` — строка, содержащая дату или дату со временем, которые нужно преобразовать. [String](../../sql-reference/data-types/string.md). -- `precision` — `3` для миллисекунд, `6` для микросекунд. По умолчанию `3`. Необязательный. [UInt8](../../sql-reference/data-types/int-uint.md). +- `precision` — требуемая точность: `3` — для миллисекунд, `6` — для микросекунд. По умолчанию — `3`. Необязательный. [UInt8](../../sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Разбирает значение `time_string` в зависимости от часового пояса. Необязательный. [String](../../sql-reference/data-types/string.md). **Возвращаемое значение** From f2307b23e3a88d1ca2d70da43665af3309583096 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:50:56 +0300 Subject: [PATCH 048/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index d75fb03d45a..0c55b183f69 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1169,7 +1169,7 @@ SELECT toLowCardinality('1'); Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. -!!! note "Примечание" +!!! info "Примечание" Обратите внимание, что возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. **Синтаксис** From 8bdd08b63d0270458fbb680b39dddf6c45b809bf Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:51:01 +0300 Subject: [PATCH 049/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 0c55b183f69..9a25facb9bb 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1170,7 +1170,7 @@ SELECT toLowCardinality('1'); Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. !!! info "Примечание" - Обратите внимание, что возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. + Возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. **Синтаксис** From 4c7c2ac8637a1894a5a4896318e6e91a34b8eaf1 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Thu, 13 May 2021 21:51:09 +0300 Subject: [PATCH 050/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 9a25facb9bb..c318dded88c 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -466,7 +466,7 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, Поддерживается также синтаксис `CAST(x AS t)`. -!!! note "Примечание" +!!! warning "Предупреждение" Если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255. **Синтаксис** From 1a049c396dff10c8fd77c522949885974e251657 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Fri, 14 May 2021 09:30:51 +0300 Subject: [PATCH 051/204] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 8a477b79836..2b3841556d8 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1101,7 +1101,7 @@ UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',6) AS a, toTypeName(a) AS t UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',3,'Europe/Moscow') AS a, toTypeName(a) AS t -FORMAT PrettyCompactMonoBlcok; +FORMAT PrettyCompactMonoBlock; ``` Result: @@ -1337,4 +1337,3 @@ Result: │ 2,"good" │ └───────────────────────────────────────────┘ ``` - From 1b7d2db4c653163556fd31682f0daa4ca1a01612 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Fri, 14 May 2021 09:30:59 +0300 Subject: [PATCH 052/204] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 2b3841556d8..fea415cf1ac 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1170,7 +1170,7 @@ Result: Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. -!!! note "Note" +!!! info "Note" Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`. **Syntax** From e3b9fde94d662b237a9a65ccdf79b5700dbad90a Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Fri, 14 May 2021 09:31:06 +0300 Subject: [PATCH 053/204] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index fea415cf1ac..65dd9c90ec4 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1171,7 +1171,7 @@ Result: Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. !!! info "Note" - Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`. + The output value is a timestamp in UTC, not in the timezone of `DateTime64`. **Syntax** From be15c96de6b72516eb52b09f0e73ffb7bdf64799 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Fri, 14 May 2021 09:31:14 +0300 Subject: [PATCH 054/204] Update docs/ru/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/ru/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index c318dded88c..c55d0b7013f 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -1100,7 +1100,7 @@ UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',6) AS a, toTypeName(a) AS t UNION ALL SELECT parseDateTime64BestEffort('2021-01-01 01:01:00.12346',3,'Europe/Moscow') AS a, toTypeName(a) AS t -FORMAT PrettyCompactMonoBlcok; +FORMAT PrettyCompactMonoBlock; ``` Результат: From 765bc28f61333664fdb3204eb5caebbde8fd5272 Mon Sep 17 00:00:00 2001 From: Evgenia Sudarikova <56156889+otrazhenia@users.noreply.github.com> Date: Fri, 14 May 2021 09:31:21 +0300 Subject: [PATCH 055/204] Update docs/en/sql-reference/functions/type-conversion-functions.md Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 65dd9c90ec4..8cd858c2a10 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1082,7 +1082,7 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Parameters** - `time_string` — String containing a date or date with time to convert. [String](../../sql-reference/data-types/string.md). -- `precision` — `3` for milliseconds, `6` for microseconds. Default `3`. Optional. [UInt8](../../sql-reference/data-types/int-uint.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](../../sql-reference/data-types/int-uint.md). - `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** From 55c573f9205220fcd379b82a7508e4ce0e18f8a6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 14 May 2021 07:11:32 +0000 Subject: [PATCH 056/204] Fix --- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 8ec55c1b1c1..ccb24d5db90 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -458,6 +458,9 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_chann AMQP::Table queue_settings; + std::unordered_set integer_settings = {"x-max-length", "x-max-length-bytes", "x-message-ttl", "x-expires", "x-priority", "x-max-priority"}; + std::unordered_set string_settings = {"x-overflow", "x-dead-letter-exchange", "x-queue-type"}; + /// Check user-defined settings. if (!queue_settings_list.empty()) { @@ -465,13 +468,12 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_chann { Strings setting_values; splitInto<'='>(setting_values, setting); - assert(setting_values.size() == 2); + if (setting_values.size() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid settings string: {}", setting); + String key = setting_values[0], value = setting_values[1]; - std::unordered_set integer_settings = {"x-max-length", "x-max-length-bytes", "x-message-ttl", "x-expires", "x-priority", "x-max-priority"}; - std::unordered_set string_settings = {"x-overflow", "x-dead-letter-exchange", "x-queue-type"}; - - if (integer_settings.find(key) != integer_settings.end()) + if (integer_settings.contains(key)) queue_settings[key] = parse(value); else if (string_settings.find(key) != string_settings.end()) queue_settings[key] = value; From 67e43937694352cdee6e306e705fd7e6ffc36b9f Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 May 2021 11:32:41 +0300 Subject: [PATCH 057/204] If table was not active set readonly mode --- .../ReplicatedMergeTreeRestartingThread.cpp | 44 ++++++++++++----- .../ReplicatedMergeTreeRestartingThread.h | 5 +- .../test.py | 47 ++++++++++++++++--- 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index ca6ea3103d1..b43770f0923 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -57,7 +57,7 @@ void ReplicatedMergeTreeRestartingThread::run() try { - if (first_time || storage.getZooKeeper()->expired()) + if (first_time || readonly_mode_was_set || storage.getZooKeeper()->expired()) { startup_completed = false; @@ -67,15 +67,15 @@ void ReplicatedMergeTreeRestartingThread::run() } else { - LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); - - bool old_val = false; - if (storage.is_readonly.compare_exchange_strong(old_val, true)) + if (!readonly_mode_was_set) { - incr_readonly = true; - CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); + LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); + setReadonly(); + } + else + { + LOG_WARNING(log, "Table was in readonly mode. Will try to activate it."); } - partialShutdown(); } @@ -98,8 +98,14 @@ void ReplicatedMergeTreeRestartingThread::run() if (!need_stop && !tryStartup()) { + /// We couldn't startup replication. Table must be readonly. + /// Otherwise it can have partially initialized queue and other + /// strange parts of state. + setReadonly(); + if (first_time) storage.startup_event.set(); + task->scheduleAfter(retry_period_ms); return; } @@ -116,7 +122,7 @@ void ReplicatedMergeTreeRestartingThread::run() bool old_val = true; if (storage.is_readonly.compare_exchange_strong(old_val, false)) { - incr_readonly = false; + readonly_mode_was_set = false; CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); } @@ -125,6 +131,8 @@ void ReplicatedMergeTreeRestartingThread::run() } catch (...) { + /// We couldn't activate table let's set it into readonly mode + setReadonly(); storage.startup_event.set(); tryLogCurrentException(log, __PRETTY_FUNCTION__); } @@ -184,7 +192,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() } catch (const Coordination::Exception & e) { - LOG_ERROR(log, "Couldn't start replication: {}. {}", e.what(), DB::getCurrentExceptionMessage(true)); + LOG_ERROR(log, "Couldn't start replication (table will be in readonly mode): {}. {}", e.what(), DB::getCurrentExceptionMessage(true)); return false; } catch (const Exception & e) @@ -192,7 +200,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() if (e.code() != ErrorCodes::REPLICA_IS_ALREADY_ACTIVE) throw; - LOG_ERROR(log, "Couldn't start replication: {}. {}", e.what(), DB::getCurrentExceptionMessage(true)); + LOG_ERROR(log, "Couldn't start replication (table will be in readonly mode): {}. {}", e.what(), DB::getCurrentExceptionMessage(true)); return false; } } @@ -356,14 +364,24 @@ void ReplicatedMergeTreeRestartingThread::shutdown() LOG_TRACE(log, "Restarting thread finished"); /// For detach table query, we should reset the ReadonlyReplica metric. - if (incr_readonly) + if (readonly_mode_was_set) { CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); - incr_readonly = false; + readonly_mode_was_set = false; } /// Stop other tasks. partialShutdown(); } +void ReplicatedMergeTreeRestartingThread::setReadonly() +{ + bool old_val = false; + if (storage.is_readonly.compare_exchange_strong(old_val, true)) + { + readonly_mode_was_set = true; + CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); + } +} + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 824ed73c171..cb10d628349 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -37,7 +37,7 @@ private: std::atomic need_stop {false}; // We need it besides `storage.is_readonly`, because `shutdown()` may be called many times, that way `storage.is_readonly` will not change. - bool incr_readonly = false; + bool readonly_mode_was_set = false; /// The random data we wrote into `/replicas/me/is_active`. String active_node_identifier; @@ -62,6 +62,9 @@ private: void updateQuorumIfWeHavePart(); void partialShutdown(); + + /// Set readonly mode for table + void setReadonly(); }; diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 79e5dece174..2549dc0a8cf 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -1,4 +1,5 @@ import pytest +import time from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry @@ -34,12 +35,32 @@ def test_mutate_and_upgrade(start_cluster): node1.query("ALTER TABLE mt DELETE WHERE id = 2", settings={"mutations_sync": "2"}) node2.query("SYSTEM SYNC REPLICA mt", timeout=5) - node1.restart_with_latest_version() - node2.restart_with_latest_version() + node1.restart_with_latest_version(signal=9) + node2.restart_with_latest_version(signal=9) - node2.query("INSERT INTO mt VALUES ('2020-02-13', 3);") + exception = None + # After hard restart table can be in readonly mode + for _ in range(40): + try: + node2.query("INSERT INTO mt VALUES ('2020-02-13', 3);") + break + except Exception as ex: + print("Cannot insert into node2 with error {}", ex) + time.sleep(0.5) + exception = ex + else: + raise exception - node1.query("SYSTEM SYNC REPLICA mt", timeout=5) + for _ in range(40): + try: + node1.query("SYSTEM SYNC REPLICA mt", timeout=5) + break + except Exception as ex: + print("Cannot sync node1 with error {}", ex) + time.sleep(0.5) + exception = ex + else: + raise exception assert node1.query("SELECT COUNT() FROM mt") == "2\n" assert node2.query("SELECT COUNT() FROM mt") == "2\n" @@ -73,12 +94,24 @@ def test_upgrade_while_mutation(start_cluster): node3.query("INSERT INTO mt1 select '2020-02-13', number from numbers(100000)") - node3.query("SYSTEM STOP MERGES") + node3.query("SYSTEM STOP MERGES mt1") node3.query("ALTER TABLE mt1 DELETE WHERE id % 2 == 0") - node3.restart_with_latest_version() + node3.restart_with_latest_version(signal=9) + + # After hard restart table can be in readonly mode + exception = None + for _ in range(40): + try: + node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) + break + except Exception as ex: + print("Cannot alter node3 with error {}", ex) + time.sleep(0.5) + exception = ex + else: + raise exception # will delete nothing, but previous async mutation will finish with this query - node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n") From d4c6a5a05e4e62a5ddb9aa9d2c610a4efcba96c5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 May 2021 11:38:53 +0300 Subject: [PATCH 058/204] Better logging --- .../MergeTree/ReplicatedMergeTreeRestartingThread.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index b43770f0923..6b7fb3bf17f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -67,12 +67,12 @@ void ReplicatedMergeTreeRestartingThread::run() } else { - if (!readonly_mode_was_set) + if (storage.getZooKeeper()->expired()) { LOG_WARNING(log, "ZooKeeper session has expired. Switching to a new session."); setReadonly(); } - else + else if (readonly_mode_was_set) { LOG_WARNING(log, "Table was in readonly mode. Will try to activate it."); } From 340cd2027445927f87f30bff6c9186f839f58bd4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 May 2021 11:55:51 +0300 Subject: [PATCH 059/204] Fix flaky TTL replicated test --- tests/integration/test_ttl_replicated/test.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index 67614b88029..c18831e5d9d 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -392,11 +392,33 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): time.sleep(5) # Wait for TTL - node_right.query("OPTIMIZE TABLE test_ttl_delete FINAL") + # after restart table can be in readonly mode + exception = None + for _ in range(40): + try: + node_right.query("OPTIMIZE TABLE test_ttl_delete FINAL") + break + except Exception as ex: + print("Cannot optimaze table on node", node_right.name, "exception", ex) + time.sleep(0.5) + exception = ex + else: + raise ex + node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL") node_right.query("OPTIMIZE TABLE test_ttl_where FINAL") - node_left.query("SYSTEM SYNC REPLICA test_ttl_delete", timeout=20) + for _ in range(40): + try: + node_left.query("SYSTEM SYNC REPLICA test_ttl_delete", timeout=20) + break + except Exception as ex: + print("Cannot sync replica table on node", node_left.name, "exception", ex) + time.sleep(0.5) + exception = ex + else: + raise ex + node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) From 81288e4b5071e153ad318987b60b9a1d4c7d8766 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Fri, 14 May 2021 14:07:11 +0300 Subject: [PATCH 060/204] Apply suggestions from code review --- .../aggregate-functions/parametric-functions.md | 2 +- .../functions/type-conversion-functions.md | 2 +- .../functions/type-conversion-functions.md | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 2a221bbb6eb..0edb1601023 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -253,7 +253,7 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN) **Parameters** -- `window` — Length of the sliding window, it is the time interval between the first condition and last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. +- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. - `mode` — It is an optional argument. One or more modes can be set. - `'strict'` — If same condition holds for sequence of events then such non-unique events would be skipped. - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 8cd858c2a10..661469e6901 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -373,7 +373,7 @@ This function accepts a number or date or date with time, and returns a FixedStr ## reinterpretAsUUID {#reinterpretasuuid} -Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions works as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. +Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. **Syntax** diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index c55d0b7013f..2226c90525d 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -440,7 +440,7 @@ reinterpret(x, type) **Возвращаемое значение** -- Конечный тип данных. +- Значение конечного типа данных. **Примеры** @@ -482,7 +482,7 @@ CAST(x, T) **Возвращаемое значение** -- Конечный тип данных. +- Значение конечного типа данных. **Примеры** @@ -894,7 +894,7 @@ AS parseDateTimeBestEffortUS; ## parseDateTimeBestEffortOrZero {#parsedatetimebesteffortorzero} ## parseDateTime32BestEffortOrZero {#parsedatetime32besteffortorzero} -Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевую дату или нулевую дату и время в случае, если получает формат даты, который не может быть обработан. +Работает аналогично функции [parseDateTimeBestEffort](#parsedatetimebesteffort), но возвращает нулевое значение, если формат даты не может быть обработан. ## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull} @@ -1116,11 +1116,11 @@ FORMAT PrettyCompactMonoBlock; ## parseDateTime64BestEffortOrNull {#parsedatetime32besteffortornull} -Работает аналогично функции [parseDateTime64BestEffort](#parsedatetime64besteffort), но возвращает `NULL` в случае, если встречает формат даты, который не может обработать. +Работает аналогично функции [parseDateTime64BestEffort](#parsedatetime64besteffort), но возвращает `NULL`, если формат даты не может быть обработан. ## parseDateTime64BestEffortOrZero {#parsedatetime64besteffortorzero} -Работает аналогично функции [parseDateTime64BestEffort](#parsedatetimebesteffort), но возвращает нулевую дату и время в случае, если встречает формат даты, который не может обработать. +Работает аналогично функции [parseDateTime64BestEffort](#parsedatetimebesteffort), но возвращает нулевую дату и время, если формат даты не может быть обработан. ## toLowCardinality {#tolowcardinality} From 123715300aef2798195b109b560d61791a8bb5af Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 May 2021 15:30:49 +0300 Subject: [PATCH 061/204] Use keeper in integration tests --- .../runner/compose/docker_compose_keeper.yml | 92 ++++++++++++++++++ tests/integration/helpers/cluster.py | 96 ++++++++++++------- tests/integration/helpers/keeper_config1.xml | 38 ++++++++ tests/integration/helpers/keeper_config2.xml | 38 ++++++++ tests/integration/helpers/keeper_config3.xml | 38 ++++++++ 5 files changed, 270 insertions(+), 32 deletions(-) create mode 100644 docker/test/integration/runner/compose/docker_compose_keeper.yml create mode 100644 tests/integration/helpers/keeper_config1.xml create mode 100644 tests/integration/helpers/keeper_config2.xml create mode 100644 tests/integration/helpers/keeper_config3.xml diff --git a/docker/test/integration/runner/compose/docker_compose_keeper.yml b/docker/test/integration/runner/compose/docker_compose_keeper.yml new file mode 100644 index 00000000000..14d878a8271 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_keeper.yml @@ -0,0 +1,92 @@ +version: '2.3' +services: + zoo1: + image: ${image:-} + restart: always + user: ${user:-} + volumes: + - type: bind + source: ${keeper_binary:-} + target: /usr/bin/clickhouse + - type: bind + source: ${keeper_config_dir1:-} + target: /etc/clickhouse-keeper + - type: bind + source: ${keeper_logs_dir1:-} + target: /var/log/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse/coordination + entrypoint: "clickhouse keeper --config=/etc/clickhouse-keeper/keeper_config1.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" + cap_add: + - SYS_PTRACE + - NET_ADMIN + - IPC_LOCK + - SYS_NICE + security_opt: + - label:disable + dns_opt: + - attempts:2 + - timeout:1 + - inet6 + - rotate + zoo2: + image: ${image:-} + restart: always + user: ${user:-} + volumes: + - type: bind + source: ${keeper_binary:-} + target: /usr/bin/clickhouse + - type: bind + source: ${keeper_config_dir2:-} + target: /etc/clickhouse-keeper + - type: bind + source: ${keeper_logs_dir2:-} + target: /var/log/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir2:-} + target: /var/lib/clickhouse/coordination + entrypoint: "clickhouse keeper --config=/etc/clickhouse-keeper/keeper_config2.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" + cap_add: + - SYS_PTRACE + - NET_ADMIN + - IPC_LOCK + - SYS_NICE + security_opt: + - label:disable + dns_opt: + - attempts:2 + - timeout:1 + - inet6 + - rotate + zoo3: + image: ${image:-} + restart: always + user: ${user:-} + volumes: + - type: bind + source: ${keeper_binary:-} + target: /usr/bin/clickhouse + - type: bind + source: ${keeper_config_dir3:-} + target: /etc/clickhouse-keeper + - type: bind + source: ${keeper_logs_dir3:-} + target: /var/log/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir3:-} + target: /var/lib/clickhouse/coordination + entrypoint: "clickhouse keeper --config=/etc/clickhouse-keeper/keeper_config3.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" + cap_add: + - SYS_PTRACE + - NET_ADMIN + - IPC_LOCK + - SYS_NICE + security_opt: + - label:disable + dns_opt: + - attempts:2 + - timeout:1 + - inet6 + - rotate diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index ed28c3a7fc4..da0898b7c23 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -37,6 +37,7 @@ DEFAULT_ENV_NAME = 'env_file' SANITIZER_SIGN = "==================" +USE_KEEPER = True def _create_env_file(path, variables, fname=DEFAULT_ENV_NAME): full_path = os.path.join(path, fname) @@ -202,6 +203,7 @@ class ClickHouseCluster: self.docker_client = None self.is_up = False + self.env = os.environ.copy() print("CLUSTER INIT base_config_dir:{}".format(self.base_config_dir)) def get_client_cmd(self): @@ -291,7 +293,10 @@ class ClickHouseCluster: cmds = [] if with_zookeeper and not self.with_zookeeper: if not zookeeper_docker_compose_path: - zookeeper_docker_compose_path = p.join(docker_compose_yml_dir, 'docker_compose_zookeeper.yml') + if USE_KEEPER: + zookeeper_docker_compose_path = p.join(docker_compose_yml_dir, 'docker_compose_keeper.yml') + else: + zookeeper_docker_compose_path = p.join(docker_compose_yml_dir, 'docker_compose_zookeeper.yml') self.with_zookeeper = True self.zookeeper_use_tmpfs = zookeeper_use_tmpfs @@ -672,29 +677,59 @@ class ClickHouseCluster: except: pass - clickhouse_pull_cmd = self.base_cmd + ['pull'] - print(f"Pulling images for {self.base_cmd}") - retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd) + #clickhouse_pull_cmd = self.base_cmd + ['pull'] + #print(f"Pulling images for {self.base_cmd}") + #retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd) self.docker_client = docker.from_env(version=self.docker_api_version) common_opts = ['up', '-d'] if self.with_zookeeper and self.base_zookeeper_cmd: - print('Setup ZooKeeper') - env = os.environ.copy() - if not self.zookeeper_use_tmpfs: - env['ZK_FS'] = 'bind' - for i in range(1, 4): - zk_data_path = self.instances_dir + '/zkdata' + str(i) - zk_log_data_path = self.instances_dir + '/zklog' + str(i) - if not os.path.exists(zk_data_path): - os.mkdir(zk_data_path) - if not os.path.exists(zk_log_data_path): - os.mkdir(zk_log_data_path) - env['ZK_DATA' + str(i)] = zk_data_path - env['ZK_DATA_LOG' + str(i)] = zk_log_data_path - run_and_check(self.base_zookeeper_cmd + common_opts, env=env) + if USE_KEEPER: + print('Setup Keeper') + binary_path = self.server_bin_path + if binary_path.endswith('-server'): + binary_path = binary_path[:-len('-server')] + + self.env['keeper_binary'] = binary_path + self.env['image'] = "yandex/clickhouse-integration-test:" + self.docker_base_tag + self.env['user'] = str(os.getuid()) + if not self.zookeeper_use_tmpfs: + self.env['keeper_fs'] = 'bind' + + for i in range (1, 4): + instance_dir = p.join(self.instances_dir, f"keeper{i}") + logs_dir = p.join(instance_dir, "logs") + configs_dir = p.join(instance_dir, "configs") + coordination_dir = p.join(instance_dir, "coordination") + if not os.path.exists(instance_dir): + os.mkdir(instance_dir) + os.mkdir(configs_dir) + os.mkdir(logs_dir) + if not self.zookeeper_use_tmpfs: + os.mkdir(coordination_dir) + shutil.copy(os.path.join(HELPERS_DIR, f'keeper_config{i}.xml'), configs_dir) + + self.env[f'keeper_logs_dir{i}'] = p.abspath(logs_dir) + self.env[f'keeper_config_dir{i}'] = p.abspath(configs_dir) + if not self.zookeeper_use_tmpfs: + self.env[f'keeper_db_dir{i}'] = p.abspath(coordination_dir) + else: + print('Setup ZooKeeper') + if not self.zookeeper_use_tmpfs: + self.env['ZK_FS'] = 'bind' + for i in range(1, 4): + zk_data_path = self.instances_dir + '/zkdata' + str(i) + zk_log_data_path = self.instances_dir + '/zklog' + str(i) + if not os.path.exists(zk_data_path): + os.mkdir(zk_data_path) + if not os.path.exists(zk_log_data_path): + os.mkdir(zk_log_data_path) + self.env['ZK_DATA' + str(i)] = zk_data_path + self.env['ZK_DATA_LOG' + str(i)] = zk_log_data_path + + run_and_check(self.base_zookeeper_cmd + common_opts, env=self.env) for command in self.pre_zookeeper_commands: self.run_kazoo_commands_with_retries(command, repeats=5) self.wait_zookeeper_to_start(120) @@ -731,9 +766,8 @@ class ClickHouseCluster: if self.with_kerberized_kafka and self.base_kerberized_kafka_cmd: print('Setup kerberized kafka') - env = os.environ.copy() - env['KERBERIZED_KAFKA_DIR'] = instance.path + '/' - run_and_check(self.base_kerberized_kafka_cmd + common_opts + ['--renew-anon-volumes'], env=env) + self.env['KERBERIZED_KAFKA_DIR'] = instance.path + '/' + run_and_check(self.base_kerberized_kafka_cmd + common_opts + ['--renew-anon-volumes'], env=self.env) self.kerberized_kafka_docker_id = self.get_instance_docker_id('kerberized_kafka1') if self.with_rabbitmq and self.base_rabbitmq_cmd: subprocess_check_call(self.base_rabbitmq_cmd + common_opts + ['--renew-anon-volumes']) @@ -747,9 +781,8 @@ class ClickHouseCluster: if self.with_kerberized_hdfs and self.base_kerberized_hdfs_cmd: print('Setup kerberized HDFS') - env = os.environ.copy() - env['KERBERIZED_HDFS_DIR'] = instance.path + '/' - run_and_check(self.base_kerberized_hdfs_cmd + common_opts, env=env) + self.env['KERBERIZED_HDFS_DIR'] = instance.path + '/' + run_and_check(self.base_kerberized_hdfs_cmd + common_opts, env=self.env) self.make_hdfs_api(kerberized=True) self.wait_hdfs_to_start(timeout=300) @@ -764,23 +797,22 @@ class ClickHouseCluster: time.sleep(10) if self.with_minio and self.base_minio_cmd: - env = os.environ.copy() prev_ca_certs = os.environ.get('SSL_CERT_FILE') if self.minio_certs_dir: minio_certs_dir = p.join(self.base_dir, self.minio_certs_dir) - env['MINIO_CERTS_DIR'] = minio_certs_dir + self.env['MINIO_CERTS_DIR'] = minio_certs_dir # Minio client (urllib3) uses SSL_CERT_FILE for certificate validation. os.environ['SSL_CERT_FILE'] = p.join(minio_certs_dir, 'public.crt') else: # Attach empty certificates directory to ensure non-secure mode. minio_certs_dir = p.join(self.instances_dir, 'empty_minio_certs_dir') os.mkdir(minio_certs_dir) - env['MINIO_CERTS_DIR'] = minio_certs_dir + self.env['MINIO_CERTS_DIR'] = minio_certs_dir minio_start_cmd = self.base_minio_cmd + common_opts logging.info("Trying to create Minio instance by command %s", ' '.join(map(str, minio_start_cmd))) - run_and_check(minio_start_cmd, env=env) + run_and_check(minio_start_cmd, env=self.env) try: logging.info("Trying to connect to Minio...") @@ -799,7 +831,7 @@ class ClickHouseCluster: clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate'] print(("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))) - subprocess_check_call(clickhouse_start_cmd) + run_and_check(clickhouse_start_cmd, env=self.env) print("ClickHouse instance created") start_deadline = time.time() + 20.0 # seconds @@ -825,7 +857,7 @@ class ClickHouseCluster: sanitizer_assert_instance = None with open(self.docker_logs_path, "w+") as f: try: - subprocess.check_call(self.base_cmd + ['logs'], stdout=f) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL + subprocess.check_call(self.base_cmd + ['logs'], env=self.env, stdout=f) # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL except Exception as e: print("Unable to get logs from docker.") f.seek(0) @@ -836,14 +868,14 @@ class ClickHouseCluster: if kill: try: - subprocess_check_call(self.base_cmd + ['stop', '--timeout', '20']) + run_and_check(self.base_cmd + ['stop', '--timeout', '20'], env=self.env) except Exception as e: print("Kill command failed during shutdown. {}".format(repr(e))) print("Trying to kill forcefully") subprocess_check_call(self.base_cmd + ['kill']) try: - subprocess_check_call(self.base_cmd + ['down', '--volumes', '--remove-orphans']) + run_and_check(self.base_cmd + ['down', '--volumes', '--remove-orphans'], env=self.env) except Exception as e: print("Down + remove orphans failed durung shutdown. {}".format(repr(e))) diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml new file mode 100644 index 00000000000..2d2f66cbc39 --- /dev/null +++ b/tests/integration/helpers/keeper_config1.xml @@ -0,0 +1,38 @@ + + 0.0.0.0 + + + trace + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + + + 2181 + 1 + + + 10000 + 30000 + false + + + + + 1 + zoo1 + 9444 + + + 2 + zoo2 + 9444 + + + 3 + zoo3 + 9444 + + + + diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml new file mode 100644 index 00000000000..81976015df1 --- /dev/null +++ b/tests/integration/helpers/keeper_config2.xml @@ -0,0 +1,38 @@ + + 0.0.0.0 + + + trace + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + + + 2181 + 2 + + + 10000 + 30000 + false + + + + + 1 + zoo1 + 9444 + + + 2 + zoo2 + 9444 + + + 3 + zoo3 + 9444 + + + + diff --git a/tests/integration/helpers/keeper_config3.xml b/tests/integration/helpers/keeper_config3.xml new file mode 100644 index 00000000000..07a9e5fc6b7 --- /dev/null +++ b/tests/integration/helpers/keeper_config3.xml @@ -0,0 +1,38 @@ + + 0.0.0.0 + + + trace + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + + + + 2181 + 3 + + + 10000 + 30000 + false + + + + + 1 + zoo1 + 9444 + + + 2 + zoo2 + 9444 + + + 3 + zoo3 + 9444 + + + + From 41776ba5d9571d1b7f41b2197ea5b4d5c15e5d2f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 14 May 2021 15:55:30 +0300 Subject: [PATCH 062/204] fix --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreePartInfo.cpp | 29 +++++++++++++++++-- src/Storages/MergeTree/MergeTreePartInfo.h | 4 +++ .../MergeTree/ReplicatedMergeTreeQueue.cpp | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 9 +++--- ...ce => 01154_move_partition_long.reference} | 0 ...tition.sh => 01154_move_partition_long.sh} | 9 +++--- 7 files changed, 41 insertions(+), 13 deletions(-) rename tests/queries/0_stateless/{01154_move_partition.reference => 01154_move_partition_long.reference} (100%) rename tests/queries/0_stateless/{01154_move_partition.sh => 01154_move_partition_long.sh} (97%) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3d2c80c1490..958db83cc6e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3215,7 +3215,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector( { std::swap(buf, res); res.clear(); - std::merge(range.begin(), range.end(), buf.begin(), buf.end(), std::back_inserter(res), LessDataPart()); + std::merge(range.begin(), range.end(), buf.begin(), buf.end(), std::back_inserter(res), LessDataPart()); //-V783 } } diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index d30f6470bb1..94430de422e 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -93,6 +93,15 @@ bool MergeTreePartInfo::tryParsePartName(const String & part_name, MergeTreePart part_info->partition_id = std::move(partition_id); part_info->min_block = min_block_num; part_info->max_block = max_block_num; + if (level == LEGACY_MAX_LEVEL) + { + /// We (accidentally) had two different max levels until 21.6 and it might cause logical errors like + /// "Part 20170601_20170630_0_2_999999999 intersects 201706_0_1_4294967295". + /// So we replace unexpected max level to make contains(...) method and comparison operators work + /// correctly with such virtual parts. On part name serialization we will use legacy max level to keep the name unchanged. + part_info->use_leagcy_max_level = true; + level = MAX_LEVEL; + } part_info->level = level; part_info->mutation = mutation; } @@ -146,7 +155,15 @@ String MergeTreePartInfo::getPartName() const writeChar('_', wb); writeIntText(max_block, wb); writeChar('_', wb); - writeIntText(level, wb); + if (use_leagcy_max_level) + { + assert(level == MAX_LEVEL); + writeIntText(LEGACY_MAX_LEVEL, wb); + } + else + { + writeIntText(level, wb); + } if (mutation) { @@ -177,7 +194,15 @@ String MergeTreePartInfo::getPartNameV0(DayNum left_date, DayNum right_date) con writeChar('_', wb); writeIntText(max_block, wb); writeChar('_', wb); - writeIntText(level, wb); + if (use_leagcy_max_level) + { + assert(level == MAX_LEVEL); + writeIntText(LEGACY_MAX_LEVEL, wb); + } + else + { + writeIntText(level, wb); + } if (mutation) { diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index 416308861b7..e81143de8c2 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -19,6 +19,8 @@ struct MergeTreePartInfo UInt32 level = 0; Int64 mutation = 0; /// If the part has been mutated or contains mutated parts, is equal to mutation version number. + bool use_leagcy_max_level = false; /// For compatibility. TODO remove it + MergeTreePartInfo() = default; MergeTreePartInfo(String partition_id_, Int64 min_block_, Int64 max_block_, UInt32 level_) @@ -86,6 +88,8 @@ struct MergeTreePartInfo static constexpr UInt32 MAX_LEVEL = 999999999; static constexpr UInt32 MAX_BLOCK_NUMBER = 999999999; + + static constexpr UInt32 LEGACY_MAX_LEVEL = std::numeric_limits::max(); }; /// Information about detached part, which includes its prefix in diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index f52a38b3706..771b04b7f58 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -16,7 +16,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNEXPECTED_NODE_IN_ZOOKEEPER; - extern const int UNFINISHED; extern const int ABORTED; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1c69c9ad6fd..21df8c9d22d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2518,7 +2518,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) parts_to_remove_str += part->name; parts_to_remove_str += " "; } - LOG_TRACE(log, "Replacing {} parts {}with {} parts ", parts_to_remove.size(), parts_to_remove_str, + LOG_TRACE(log, "Replacing {} parts {}with {} parts {}", parts_to_remove.size(), parts_to_remove_str, final_parts.size(), boost::algorithm::join(final_part_names, ", ")); } } @@ -4907,13 +4907,12 @@ bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const St if (for_replace_range) { - /// NOTE Undo max block number decrement for REPLACE_RANGE, because there are invariants: + /// NOTE Do not decrement max block number for REPLACE_RANGE, because there are invariants: /// - drop range for REPLACE PARTITION must contain at least 2 blocks (1 skipped block and at least 1 real block) /// - drop range for MOVE PARTITION/ATTACH PARTITION FROM always contains 1 block - /// REPLACE/MOVE PARTITION uses different max level for unknown (probably historical) reason. - auto max_level = std::numeric_limits::max(); - part_info = MergeTreePartInfo(partition_id, left, right, max_level, mutation_version); + /// NOTE UINT_MAX was previously used as max level for REPLACE/MOVE PARTITION (it was incorrect) + part_info = MergeTreePartInfo(partition_id, left, right, MergeTreePartInfo::MAX_LEVEL, mutation_version); return right != 0; } diff --git a/tests/queries/0_stateless/01154_move_partition.reference b/tests/queries/0_stateless/01154_move_partition_long.reference similarity index 100% rename from tests/queries/0_stateless/01154_move_partition.reference rename to tests/queries/0_stateless/01154_move_partition_long.reference diff --git a/tests/queries/0_stateless/01154_move_partition.sh b/tests/queries/0_stateless/01154_move_partition_long.sh similarity index 97% rename from tests/queries/0_stateless/01154_move_partition.sh rename to tests/queries/0_stateless/01154_move_partition_long.sh index 6d4fef3876b..f666cc929cc 100755 --- a/tests/queries/0_stateless/01154_move_partition.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -76,9 +76,9 @@ function optimize_thread() { while true; do REPLICA=$(($RANDOM % 16)) - TABLE="src_" + TABLE="src" if (( RANDOM % 2 )); then - TABLE="dst_" + TABLE="dst" fi $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE ${TABLE}_$REPLICA" 2>/dev/null sleep 0.$RANDOM; @@ -105,9 +105,10 @@ timeout $TIMEOUT bash -c optimize_thread & wait for ((i=0; i<16; i++)) do - $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA dst_$i" - $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA dst_$i" & + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA src_$i" 2>/dev/null & done +wait echo "Replication did not hang" for ((i=0; i<16; i++)) do From de75d9c887c7e4775a595aca3eb4a98f775ccd09 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 May 2021 16:44:32 +0300 Subject: [PATCH 063/204] Revert accident changes --- tests/integration/helpers/cluster.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index da0898b7c23..1edee6216a6 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -677,9 +677,9 @@ class ClickHouseCluster: except: pass - #clickhouse_pull_cmd = self.base_cmd + ['pull'] - #print(f"Pulling images for {self.base_cmd}") - #retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd) + clickhouse_pull_cmd = self.base_cmd + ['pull'] + print(f"Pulling images for {self.base_cmd}") + retry_exception(10, 5, subprocess_check_call, Exception, clickhouse_pull_cmd) self.docker_client = docker.from_env(version=self.docker_api_version) From 18bf681a83066f8f75537326730bddb12c67ffa4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 14 May 2021 18:35:30 +0300 Subject: [PATCH 064/204] Fix pull --- .../integration/runner/compose/docker_compose_keeper.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_keeper.yml b/docker/test/integration/runner/compose/docker_compose_keeper.yml index 14d878a8271..39d4a9c53d9 100644 --- a/docker/test/integration/runner/compose/docker_compose_keeper.yml +++ b/docker/test/integration/runner/compose/docker_compose_keeper.yml @@ -1,7 +1,7 @@ version: '2.3' services: zoo1: - image: ${image:-} + image: ${image:-yandex/clickhouse-integration-test} restart: always user: ${user:-} volumes: @@ -31,7 +31,7 @@ services: - inet6 - rotate zoo2: - image: ${image:-} + image: ${image:-yandex/clickhouse-integration-test} restart: always user: ${user:-} volumes: @@ -61,7 +61,7 @@ services: - inet6 - rotate zoo3: - image: ${image:-} + image: ${image:-yandex/clickhouse-integration-test} restart: always user: ${user:-} volumes: From df5f3fbc9d7f50426a699077044ba3e1f50cc576 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 14 May 2021 19:11:40 +0300 Subject: [PATCH 065/204] review suggestions --- .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 12 ++++-- .../MergeTree/ReplicatedMergeTreeLogEntry.h | 3 ++ .../MergeTree/ReplicatedMergeTreeQueue.cpp | 7 ---- .../MergeTree/ReplicatedMergeTreeQueue.h | 7 ---- src/Storages/StorageReplicatedMergeTree.cpp | 38 ++++++------------- tests/queries/skip_list.json | 1 + 6 files changed, 23 insertions(+), 45 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 22e8b7afa17..6d028725720 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -367,6 +367,12 @@ void ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry::readText(ReadBuffer & i in >> "columns_version: " >> columns_version; } +bool ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry::isMovePartitionOrAttachFrom(const MergeTreePartInfo & drop_range_info) +{ + assert(drop_range_info.getBlocksCount() != 0); + return drop_range_info.getBlocksCount() == 1; +} + String ReplicatedMergeTreeLogEntryData::toString() const { WriteBufferFromOwnString out; @@ -397,8 +403,7 @@ Strings ReplicatedMergeTreeLogEntryData::getVirtualPartNames(MergeTreeDataFormat if (type == DROP_RANGE) return {new_part_name}; - /// Return {} because selection of merges in the partition where the column is cleared - /// should not be blocked (only execution of merges should be blocked). + /// CLEAR_COLUMN and CLEAR_INDEX are deprecated since 20.3 if (type == CLEAR_COLUMN || type == CLEAR_INDEX) return {}; @@ -406,8 +411,7 @@ Strings ReplicatedMergeTreeLogEntryData::getVirtualPartNames(MergeTreeDataFormat { Strings res = replace_range_entry->new_part_names; auto drop_range_info = MergeTreePartInfo::fromPartName(replace_range_entry->drop_range_part_name, format_version); - assert(drop_range_info.getBlocksCount() != 0); - if (drop_range_info.getBlocksCount() > 1) + if (!ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range_info)) { /// It's REPLACE, not MOVE or ATTACH, so drop range is real res.emplace_back(replace_range_entry->drop_range_part_name); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 8ea78bacd38..c6ed0681f00 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -19,6 +19,7 @@ namespace DB class ReadBuffer; class WriteBuffer; class ReplicatedMergeTreeQueue; +struct MergeTreePartInfo; namespace ErrorCodes { @@ -110,6 +111,8 @@ struct ReplicatedMergeTreeLogEntryData void writeText(WriteBuffer & out) const; void readText(ReadBuffer & in); + + static bool isMovePartitionOrAttachFrom(const MergeTreePartInfo & drop_range_info); }; std::shared_ptr replace_range_entry; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 771b04b7f58..b81b5712594 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1578,13 +1578,6 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep } -void ReplicatedMergeTreeQueue::disableMergesInBlockRangeOnLocalReplica(const String & part_name) -{ - std::lock_guard lock(state_mutex); - virtual_parts.add(part_name); -} - - ReplicatedMergeTreeQueue::Status ReplicatedMergeTreeQueue::getStatus() const { std::lock_guard lock(state_mutex); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index a902b2dad93..8b1028623b2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -377,13 +377,6 @@ public: /// (because some mutations are probably done but we are not sure yet), returns true. bool tryFinalizeMutations(zkutil::ZooKeeperPtr zookeeper); - /// Prohibit merges in the specified blocks range. - /// Add part to virtual_parts, which means that part must exist - /// after processing replication log up to log_pointer. - /// Part maybe fake (look at ReplicatedMergeTreeMergePredicate). - /// NOTE It does not disable merges on other leader replicas. - void disableMergesInBlockRangeOnLocalReplica(const String & part_name); - /// Checks that part is already in virtual parts bool isVirtualPart(const MergeTreeData::DataPartPtr & data_part) const; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 21df8c9d22d..c0ae46552ec 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2191,7 +2191,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) MergeTreePartInfo drop_range = MergeTreePartInfo::fromPartName(entry_replace.drop_range_part_name, format_version); /// Range with only one block has special meaning: it's ATTACH PARTITION or MOVE PARTITION, so there is no drop range - bool replace = drop_range.getBlocksCount() > 1; + bool replace = !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range); if (replace) queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry); @@ -6234,18 +6234,10 @@ void StorageReplicatedMergeTree::replacePartitionFrom( drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); } - assert(drop_range.getBlocksCount() > 0); - assert(replace == (drop_range.getBlocksCount() > 1)); + assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); - if (replace) - { - /// Optional step - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInBlockRangeOnLocalReplica(drop_range_fake_part_name); - } - for (const auto & src_part : src_all_parts) { /// We also make some kind of deduplication to avoid duplicated parts in case of ATTACH PARTITION @@ -6336,7 +6328,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom( txn->moveOpsTo(ops); delimiting_block_lock->getUnlockOps(ops); - ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version + /// Just update version, because merges assignment relies on it + ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); Transaction transaction(*this); @@ -6427,16 +6420,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta MergeTreePartInfo drop_range; std::optional delimiting_block_lock; - bool partition_was_not_empty = getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); + getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); - if (partition_was_not_empty) - { - /// Optional step - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInBlockRangeOnLocalReplica(drop_range_fake_part_name); - } - /// Clone parts into destination table. for (const auto & src_part : src_all_parts) @@ -6570,7 +6556,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta ops_src.emplace_back(zkutil::makeCreateRequest( zookeeper_path + "/log/log-", entry_delete.toString(), zkutil::CreateMode::PersistentSequential)); - ops_src.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version + /// Just update version, because merges assignment relies on it + ops_src.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); delimiting_block_lock->getUnlockOps(ops_src); op_results = zookeeper->multi(ops_src); @@ -6779,7 +6766,8 @@ bool StorageReplicatedMergeTree::dropPart( ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/log", merge_pred.getVersion())); /// Make sure no new events were added to the log. ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version. + /// Just update version, because merges assignment relies on it + ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); Coordination::Responses responses; Coordination::Error rc = zookeeper->tryMulti(ops, responses); @@ -6818,11 +6806,6 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( clearBlocksInPartition(zookeeper, partition_id, drop_range_info.min_block, drop_range_info.max_block); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range_info); - { - /// Optional step - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - queue.disableMergesInBlockRangeOnLocalReplica(drop_range_fake_part_name); - } LOG_DEBUG(log, "Disabled merges covered by range {}", drop_range_fake_part_name); @@ -6835,7 +6818,8 @@ bool StorageReplicatedMergeTree::dropAllPartsInPartition( Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version. + /// Just update version, because merges assignment relies on it + ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); delimiting_block_lock->getUnlockOps(ops); if (auto txn = query_context->getZooKeeperMetadataTransaction()) txn->moveOpsTo(ops); diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index bc12fed0c92..5e2d15a10aa 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -571,6 +571,7 @@ "01150_ddl_guard_rwr", "01153_attach_mv_uuid", "01152_cross_replication", + "01154_move_partition_long", "01185_create_or_replace_table", "01190_full_attach_syntax", "01191_rename_dictionary", From 26be39f4197e8282c25c73fef5970be92a84a7fe Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 11 May 2021 21:59:40 +0300 Subject: [PATCH 066/204] CompileExpressions comparison function constant case fix --- src/Interpreters/ExpressionJIT.cpp | 13 ++++++++++++ ...5_jit_comparison_constant_result.reference | 8 +++++++ .../01855_jit_comparison_constant_result.sql | 21 +++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 4169d95bfa4..3d565dd384b 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -328,6 +328,13 @@ static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Node & node, c NameGreaterOrEquals::name }; + /** Comparision operator is special case for ActionDAG compilation + * Its result can be constant and we can understand that only during Function execute call. + * It can be a problem if two DAGs with compare function are analyzed, but in first DAG comparison + * function is compiled, in second DAG it is not compiled. + * There will be error because of block headers mismatch. + */ + auto it = comparison_functions.find(impl.getName()); if (it == comparison_functions.end()) return false; @@ -335,6 +342,12 @@ static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Node & node, c const auto * lhs_node = node.children[0]; const auto * rhs_node = node.children[1]; + while (lhs_node->type == ActionsDAG::ActionType::ALIAS) + lhs_node = lhs_node->children[0]; + + while (rhs_node->type == ActionsDAG::ActionType::ALIAS) + rhs_node = rhs_node->children[0]; + return lhs_node == rhs_node && !isTuple(lhs_node->result_type); } diff --git a/tests/queries/0_stateless/01855_jit_comparison_constant_result.reference b/tests/queries/0_stateless/01855_jit_comparison_constant_result.reference index a9e2f17562a..e97edac16d6 100644 --- a/tests/queries/0_stateless/01855_jit_comparison_constant_result.reference +++ b/tests/queries/0_stateless/01855_jit_comparison_constant_result.reference @@ -1,3 +1,11 @@ +ComparisionOperator column with same column +1 +1 +1 +1 +1 +1 +ComparisionOperator column with alias on same column 1 1 1 diff --git a/tests/queries/0_stateless/01855_jit_comparison_constant_result.sql b/tests/queries/0_stateless/01855_jit_comparison_constant_result.sql index b8d06e218e0..51cf9aa1d17 100644 --- a/tests/queries/0_stateless/01855_jit_comparison_constant_result.sql +++ b/tests/queries/0_stateless/01855_jit_comparison_constant_result.sql @@ -1,6 +1,8 @@ SET compile_expressions = 1; SET min_count_to_compile_expression = 0; +SELECT 'ComparisionOperator column with same column'; + DROP TABLE IF EXISTS test_table; CREATE TABLE test_table (a UInt64) ENGINE = MergeTree() ORDER BY tuple(); INSERT INTO test_table VALUES (1); @@ -13,3 +15,22 @@ SELECT test_table.a FROM test_table ORDER BY (test_table.a <= test_table.a) + 1; SELECT test_table.a FROM test_table ORDER BY (test_table.a == test_table.a) + 1; SELECT test_table.a FROM test_table ORDER BY (test_table.a != test_table.a) + 1; + +DROP TABLE test_table; + +SELECT 'ComparisionOperator column with alias on same column'; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (a UInt64, b ALIAS a, c ALIAS b) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO test_table VALUES (1); + +SELECT test_table.a FROM test_table ORDER BY (test_table.a > test_table.b) + 1 AND (test_table.a > test_table.c) + 1; +SELECT test_table.a FROM test_table ORDER BY (test_table.a >= test_table.b) + 1 AND (test_table.a >= test_table.c) + 1; + +SELECT test_table.a FROM test_table ORDER BY (test_table.a < test_table.b) + 1 AND (test_table.a < test_table.c) + 1; +SELECT test_table.a FROM test_table ORDER BY (test_table.a <= test_table.b) + 1 AND (test_table.a <= test_table.c) + 1; + +SELECT test_table.a FROM test_table ORDER BY (test_table.a == test_table.b) + 1 AND (test_table.a == test_table.c) + 1; +SELECT test_table.a FROM test_table ORDER BY (test_table.a != test_table.b) + 1 AND (test_table.a != test_table.c) + 1; + +DROP TABLE test_table; From f42b25e65c9d385d4a47b5711fb7678e8edbe6f3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 12 May 2021 09:52:40 +0300 Subject: [PATCH 067/204] Update ExpressionJIT.cpp --- src/Interpreters/ExpressionJIT.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 3d565dd384b..09b2130e4bb 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -316,7 +316,7 @@ static bool isCompilableConstant(const ActionsDAG::Node & node) return node.column && isColumnConst(*node.column) && canBeNativeType(*node.result_type) && node.allow_constant_folding; } -static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Node & node, const IFunctionBase & impl) +static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Noomparision de & node, const IFunctionBase & impl) { static std::unordered_set comparison_functions { @@ -328,7 +328,7 @@ static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Node & node, c NameGreaterOrEquals::name }; - /** Comparision operator is special case for ActionDAG compilation + /** Comparison operator is special case for ActionDAG compilation * Its result can be constant and we can understand that only during Function execute call. * It can be a problem if two DAGs with compare function are analyzed, but in first DAG comparison * function is compiled, in second DAG it is not compiled. From 18894005ac80fb799e789c0f6f5d3765f2c25103 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 12 May 2021 10:36:09 +0300 Subject: [PATCH 068/204] Update ExpressionJIT.cpp --- src/Interpreters/ExpressionJIT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index 09b2130e4bb..b26092154a0 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -316,7 +316,7 @@ static bool isCompilableConstant(const ActionsDAG::Node & node) return node.column && isColumnConst(*node.column) && canBeNativeType(*node.result_type) && node.allow_constant_folding; } -static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Noomparision de & node, const IFunctionBase & impl) +static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Node & node, const IFunctionBase & impl) { static std::unordered_set comparison_functions { From 0b4b5eb80883f7100979f03a49064195ccbb7745 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 12 May 2021 17:43:37 +0300 Subject: [PATCH 069/204] FunctionComparison remove constant result for non constant arguments optimization --- src/Functions/FunctionsComparison.h | 11 +++++++-- src/Interpreters/ExpressionJIT.cpp | 38 ----------------------------- 2 files changed, 9 insertions(+), 40 deletions(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index f724915b3bc..f7c45558750 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1148,17 +1148,24 @@ public: /// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not). if (left_type->equals(*right_type) && !left_type->isNullable() && !isTuple(left_type) && col_left_untyped == col_right_untyped) { + ColumnPtr result_column; + /// Always true: =, <=, >= if constexpr (IsOperation::equals || IsOperation::less_or_equals || IsOperation::greater_or_equals) { - return DataTypeUInt8().createColumnConst(input_rows_count, 1u); + result_column = DataTypeUInt8().createColumnConst(input_rows_count, 1u); } else { - return DataTypeUInt8().createColumnConst(input_rows_count, 0u); + result_column = DataTypeUInt8().createColumnConst(input_rows_count, 0u); } + + if (!isColumnConst(*col_left_untyped)) + result_column = result_column->convertToFullColumnIfConst(); + + return result_column; } WhichDataType which_left{left_type}; diff --git a/src/Interpreters/ExpressionJIT.cpp b/src/Interpreters/ExpressionJIT.cpp index b26092154a0..3b23e232184 100644 --- a/src/Interpreters/ExpressionJIT.cpp +++ b/src/Interpreters/ExpressionJIT.cpp @@ -316,41 +316,6 @@ static bool isCompilableConstant(const ActionsDAG::Node & node) return node.column && isColumnConst(*node.column) && canBeNativeType(*node.result_type) && node.allow_constant_folding; } -static bool checkIfFunctionIsComparisonEdgeCase(const ActionsDAG::Node & node, const IFunctionBase & impl) -{ - static std::unordered_set comparison_functions - { - NameEquals::name, - NameNotEquals::name, - NameLess::name, - NameGreater::name, - NameLessOrEquals::name, - NameGreaterOrEquals::name - }; - - /** Comparison operator is special case for ActionDAG compilation - * Its result can be constant and we can understand that only during Function execute call. - * It can be a problem if two DAGs with compare function are analyzed, but in first DAG comparison - * function is compiled, in second DAG it is not compiled. - * There will be error because of block headers mismatch. - */ - - auto it = comparison_functions.find(impl.getName()); - if (it == comparison_functions.end()) - return false; - - const auto * lhs_node = node.children[0]; - const auto * rhs_node = node.children[1]; - - while (lhs_node->type == ActionsDAG::ActionType::ALIAS) - lhs_node = lhs_node->children[0]; - - while (rhs_node->type == ActionsDAG::ActionType::ALIAS) - rhs_node = rhs_node->children[0]; - - return lhs_node == rhs_node && !isTuple(lhs_node->result_type); -} - static bool isCompilableFunction(const ActionsDAG::Node & node) { if (node.type != ActionsDAG::ActionType::FUNCTION) @@ -367,9 +332,6 @@ static bool isCompilableFunction(const ActionsDAG::Node & node) return false; } - if (checkIfFunctionIsComparisonEdgeCase(node, *node.function_base)) - return false; - return function.isCompilable(); } From 6c334800c36aa8e5b0b18a6d289f10a1b7806033 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 13 May 2021 17:19:22 +0300 Subject: [PATCH 070/204] Disable broken test --- .../00911_tautological_compare.reference | 8 -------- .../0_stateless/00911_tautological_compare.sql | 16 ++++++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/tests/queries/0_stateless/00911_tautological_compare.reference b/tests/queries/0_stateless/00911_tautological_compare.reference index 405d3348775..e69de29bb2d 100644 --- a/tests/queries/0_stateless/00911_tautological_compare.reference +++ b/tests/queries/0_stateless/00911_tautological_compare.reference @@ -1,8 +0,0 @@ -0 -0 -0 -0 -0 -0 -0 -0 diff --git a/tests/queries/0_stateless/00911_tautological_compare.sql b/tests/queries/0_stateless/00911_tautological_compare.sql index 34c95d73716..5de5fdfda19 100644 --- a/tests/queries/0_stateless/00911_tautological_compare.sql +++ b/tests/queries/0_stateless/00911_tautological_compare.sql @@ -1,10 +1,10 @@ -SELECT count() FROM system.numbers WHERE number != number; -SELECT count() FROM system.numbers WHERE number < number; -SELECT count() FROM system.numbers WHERE number > number; +-- SELECT count() FROM system.numbers WHERE number != number; +-- SELECT count() FROM system.numbers WHERE number < number; +-- SELECT count() FROM system.numbers WHERE number > number; -SELECT count() FROM system.numbers WHERE NOT (number = number); -SELECT count() FROM system.numbers WHERE NOT (number <= number); -SELECT count() FROM system.numbers WHERE NOT (number >= number); +-- SELECT count() FROM system.numbers WHERE NOT (number = number); +-- SELECT count() FROM system.numbers WHERE NOT (number <= number); +-- SELECT count() FROM system.numbers WHERE NOT (number >= number); -SELECT count() FROM system.numbers WHERE SHA256(toString(number)) != SHA256(toString(number)); -SELECT count() FROM system.numbers WHERE SHA256(toString(number)) != SHA256(toString(number)) AND rand() > 10; +-- SELECT count() FROM system.numbers WHERE SHA256(toString(number)) != SHA256(toString(number)); +-- SELECT count() FROM system.numbers WHERE SHA256(toString(number)) != SHA256(toString(number)) AND rand() > 10; From 20ef8e8bf23ef026c0dd7245f4f38746d114884f Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 15 May 2021 06:39:29 +0300 Subject: [PATCH 071/204] Update docs/ru/operations/settings/merge-tree-settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/merge-tree-settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index a89140800eb..bc71caf1bda 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -162,7 +162,7 @@ Eсли суммарное число активных кусков во все ## replicated_fetches_http_send_timeout {#replicated_fetches_http_send_timeout} -Тайм-аут отправки HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. +Тайм-аут (в секундах) для отправки HTTP-запросов на скачивание кусков. Наследуется из профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. Возможные значения: From 73c7552adde4d68894c7c9918a2be3663a8d67e8 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 15 May 2021 06:39:38 +0300 Subject: [PATCH 072/204] Update docs/ru/operations/settings/merge-tree-settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/merge-tree-settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index bc71caf1bda..2ee92e5cd56 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -151,7 +151,7 @@ Eсли суммарное число активных кусков во все ## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout} -Тайм-аут HTTP-соединения (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. +Тайм-аут HTTP-соединения (в секундах) для запросов на скачивание кусков. Наследуется из профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. Возможные значения: From aea177ea42095b1a75ab6583bdd4cfe5af2df797 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 15 May 2021 06:39:46 +0300 Subject: [PATCH 073/204] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 26513350b63..b77ba99488e 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2859,7 +2859,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; ``` ## http_connection_timeout {#http_connection_timeout} -Тайм-аут HTTP-соединения (в секундах). +Тайм-аут для HTTP-соединения (в секундах). Возможные значения: From a0c0723c3db58e758ee58273bac76217332e07a2 Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 15 May 2021 06:39:58 +0300 Subject: [PATCH 074/204] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index b77ba99488e..3ee19670061 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2881,7 +2881,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; ## http_receive_timeout {#http_receive_timeout} -Тайм-аут приема HTTP (в секундах). +Тайм-аут для получения HTTP-запроса (в секундах). Возможные значения: From 2b05303d7d810678805f26edfabd0cd3bf600a9b Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 15 May 2021 06:40:08 +0300 Subject: [PATCH 075/204] Update docs/ru/operations/settings/settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 3ee19670061..c32cf1d8158 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2870,7 +2870,7 @@ SELECT * FROM test LIMIT 10 OFFSET 100; ## http_send_timeout {#replicated_fetches_http_send_timeout} -Тайм-аут отправки HTTP (в секундах). +Тайм-аут для отправки HTTP-запросов (в секундах). Возможные значения: From b9f332127001a08c66079503cc47c085ac85aafe Mon Sep 17 00:00:00 2001 From: Anna <42538400+adevyatova@users.noreply.github.com> Date: Sat, 15 May 2021 06:40:18 +0300 Subject: [PATCH 076/204] Update docs/ru/operations/settings/merge-tree-settings.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/operations/settings/merge-tree-settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index 2ee92e5cd56..be154bbd19a 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -173,7 +173,7 @@ Eсли суммарное число активных кусков во все ## replicated_fetches_http_receive_timeout {#replicated_fetches_http_receive_timeout} -Тайм-аут приёма HTTP (в секундах) для запросов на скачивание кусков. Наследуется от профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. +Тайм-аут (в секундах) для получения HTTP-запросов на скачивание кусков. Наследуется из профиля по умолчанию [http_connection_timeout](./settings.md#http_connection_timeout), если не задан явно. Возможные значения: From a44ddd2064a2d394cde72ff591009adb82f76489 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 15 May 2021 15:03:13 +0300 Subject: [PATCH 077/204] Update distributed_group_by_no_merge description --- docs/en/operations/settings/settings.md | 4 ++-- src/Core/Settings.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index b0c879af931..1a2f8aba35f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1520,8 +1520,8 @@ Do not merge aggregation states from different servers for distributed query pro Possible values: - 0 — Disabled (final query processing is done on the initiator node). -- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data). -- 2 - Same as 1 but apply `ORDER BY` and `LIMIT` on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). +- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards. +- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possilbe when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). **Example** diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 42a20441a2e..75f6c002fff 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -115,7 +115,7 @@ class IColumn; M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ \ M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \ - M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards. If 2 - same as 1 but also apply ORDER BY and LIMIT stages", 0) \ + M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed queries (shards will process query up to the Complete stage, initiator just proxies the data from the shards). If 2 the initiator will apply ORDER BY and LIMIT stages (it is not in case when shard process query up to the Complete stage)", 0) \ M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \ M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \ M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ From 33e9f1bcf12c0a5f9fa838a7117104f2f15cb6cb Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 15 May 2021 15:33:01 +0300 Subject: [PATCH 078/204] Better tests --- tests/integration/helpers/test_tools.py | 13 ++++++ .../test_polymorphic_parts/test.py | 19 +++++---- tests/integration/test_ttl_replicated/test.py | 26 ++---------- .../test.py | 40 ++----------------- 4 files changed, 31 insertions(+), 67 deletions(-) diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index 5fedadd3380..93478c4dd49 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -80,3 +80,16 @@ def assert_logs_contain_with_retry(instance, substring, retry_count=20, sleep_ti time.sleep(sleep_time) else: raise AssertionError("'{}' not found in logs".format(substring)) + +def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, settings={}): + exception = None + for _ in range(retry_count): + try: + instance.query(query, timeout=30, settings=settings) + break + except Exception as ex: + exception = ex + print("Failed to execute query '", query, "' on instance", instance.name, "will retry") + time.sleep(sleep_time) + else: + raise exception diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index dc16bab0ca4..9fe3ef77da8 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -7,7 +7,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager from helpers.test_tools import TSV -from helpers.test_tools import assert_eq_with_retry +from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry cluster = ClickHouseCluster(__file__) @@ -408,8 +408,9 @@ def test_in_memory_wal_rotate(start_cluster): def test_in_memory_deduplication(start_cluster): for i in range(3): - node9.query("INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')") - node10.query("INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')") + # table can be in readonly node + exec_query_with_retry(node9, "INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')") + exec_query_with_retry(node10, "INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')") node9.query("SYSTEM SYNC REPLICA deduplication_table", timeout=20) node10.query("SYSTEM SYNC REPLICA deduplication_table", timeout=20) @@ -430,10 +431,10 @@ def test_in_memory_alters(start_cluster): node9.restart_clickhouse(kill=True) expected = "1\tab\t0\n2\tcd\t0\n" - assert node9.query("SELECT id, s, col1 FROM alters_table") == expected + assert node9.query("SELECT id, s, col1 FROM alters_table ORDER BY id") == expected check_parts_type(1) - - node9.query("INSERT INTO alters_table (date, id, col1) VALUES (toDate('2020-10-10'), 3, 100)") + # After hard restart table can be in readonly mode + exec_query_with_retry(node9, "INSERT INTO alters_table (date, id, col1) VALUES (toDate('2020-10-10'), 3, 100)") node9.query("ALTER TABLE alters_table MODIFY COLUMN col1 String") node9.query("ALTER TABLE alters_table DROP COLUMN s") node9.restart_clickhouse(kill=True) @@ -442,8 +443,10 @@ def test_in_memory_alters(start_cluster): with pytest.raises(Exception): node9.query("SELECT id, s, col1 FROM alters_table") - expected = expected = "1\t0_foo\n2\t0_foo\n3\t100_foo\n" - assert node9.query("SELECT id, col1 || '_foo' FROM alters_table") + # Values of col1 was not materialized as integers, so they have + # default string values after alter + expected = "1\t_foo\n2\t_foo\n3\t100_foo\n" + assert node9.query("SELECT id, col1 || '_foo' FROM alters_table ORDER BY id") == expected def test_polymorphic_parts_index(start_cluster): diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index c18831e5d9d..f32edc36a71 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -3,7 +3,7 @@ import time import helpers.client as client import pytest from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV +from helpers.test_tools import TSV, exec_query_with_retry cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', with_zookeeper=True) @@ -393,31 +393,11 @@ def test_ttl_compatibility(started_cluster, node_left, node_right, num_run): time.sleep(5) # Wait for TTL # after restart table can be in readonly mode - exception = None - for _ in range(40): - try: - node_right.query("OPTIMIZE TABLE test_ttl_delete FINAL") - break - except Exception as ex: - print("Cannot optimaze table on node", node_right.name, "exception", ex) - time.sleep(0.5) - exception = ex - else: - raise ex + exec_query_with_retry(node_right, "OPTIMIZE TABLE test_ttl_delete FINAL") node_right.query("OPTIMIZE TABLE test_ttl_group_by FINAL") node_right.query("OPTIMIZE TABLE test_ttl_where FINAL") - - for _ in range(40): - try: - node_left.query("SYSTEM SYNC REPLICA test_ttl_delete", timeout=20) - break - except Exception as ex: - print("Cannot sync replica table on node", node_left.name, "exception", ex) - time.sleep(0.5) - exception = ex - else: - raise ex + exec_query_with_retry(node_left, "SYSTEM SYNC REPLICA test_ttl_delete") node_left.query("SYSTEM SYNC REPLICA test_ttl_group_by", timeout=20) node_left.query("SYSTEM SYNC REPLICA test_ttl_where", timeout=20) diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 2549dc0a8cf..1ef65512959 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -2,7 +2,7 @@ import pytest import time from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry +from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry cluster = ClickHouseCluster(__file__) @@ -38,29 +38,9 @@ def test_mutate_and_upgrade(start_cluster): node1.restart_with_latest_version(signal=9) node2.restart_with_latest_version(signal=9) - exception = None # After hard restart table can be in readonly mode - for _ in range(40): - try: - node2.query("INSERT INTO mt VALUES ('2020-02-13', 3);") - break - except Exception as ex: - print("Cannot insert into node2 with error {}", ex) - time.sleep(0.5) - exception = ex - else: - raise exception - - for _ in range(40): - try: - node1.query("SYSTEM SYNC REPLICA mt", timeout=5) - break - except Exception as ex: - print("Cannot sync node1 with error {}", ex) - time.sleep(0.5) - exception = ex - else: - raise exception + exec_query_with_retry(node2, "INSERT INTO mt VALUES ('2020-02-13', 3)") + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA mt") assert node1.query("SELECT COUNT() FROM mt") == "2\n" assert node2.query("SELECT COUNT() FROM mt") == "2\n" @@ -99,19 +79,7 @@ def test_upgrade_while_mutation(start_cluster): node3.restart_with_latest_version(signal=9) - # After hard restart table can be in readonly mode - exception = None - for _ in range(40): - try: - node3.query("ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) - break - except Exception as ex: - print("Cannot alter node3 with error {}", ex) - time.sleep(0.5) - exception = ex - else: - raise exception - + exec_query_with_retry(node3, "ALTER TABLE mt1 DELETE WHERE id > 100000", settings={"mutations_sync": "2"}) # will delete nothing, but previous async mutation will finish with this query assert_eq_with_retry(node3, "SELECT COUNT() from mt1", "50000\n") From 852608c9372d6b11a476abc05262e4d6709569cd Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 15 May 2021 18:01:00 +0300 Subject: [PATCH 079/204] Don't abort on Auth request --- src/Coordination/KeeperStorage.cpp | 13 ++++++++++++- tests/integration/helpers/keeper_config1.xml | 2 ++ tests/integration/helpers/keeper_config2.xml | 2 ++ tests/integration/helpers/keeper_config3.xml | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 5f2d6141be9..a449a106576 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -547,6 +547,17 @@ struct KeeperStorageCloseRequest final : public KeeperStorageRequest } }; +/// Dummy implementation TODO: impelement simple ACL +struct KeeperStorageAuthRequest final : public KeeperStorageRequest +{ + using KeeperStorageRequest::KeeperStorageRequest; + std::pair process(KeeperStorage::Container &, KeeperStorage::Ephemerals &, int64_t, int64_t) const override + { + Coordination::ZooKeeperResponsePtr response_ptr = zk_request->makeResponse(); + return { response_ptr, {} }; + } +}; + void KeeperStorage::finalize() { if (finalized) @@ -611,7 +622,7 @@ KeeperWrapperFactory::KeeperWrapperFactory() { registerKeeperRequestWrapper(*this); registerKeeperRequestWrapper(*this); - //registerKeeperRequestWrapper(*this); + registerKeeperRequestWrapper(*this); registerKeeperRequestWrapper(*this); registerKeeperRequestWrapper(*this); registerKeeperRequestWrapper(*this); diff --git a/tests/integration/helpers/keeper_config1.xml b/tests/integration/helpers/keeper_config1.xml index 2d2f66cbc39..687bcff0f54 100644 --- a/tests/integration/helpers/keeper_config1.xml +++ b/tests/integration/helpers/keeper_config1.xml @@ -1,4 +1,6 @@ + true + :: 0.0.0.0 diff --git a/tests/integration/helpers/keeper_config2.xml b/tests/integration/helpers/keeper_config2.xml index 81976015df1..71f4f3552f8 100644 --- a/tests/integration/helpers/keeper_config2.xml +++ b/tests/integration/helpers/keeper_config2.xml @@ -1,4 +1,6 @@ + true + :: 0.0.0.0 diff --git a/tests/integration/helpers/keeper_config3.xml b/tests/integration/helpers/keeper_config3.xml index 07a9e5fc6b7..e235bc448a5 100644 --- a/tests/integration/helpers/keeper_config3.xml +++ b/tests/integration/helpers/keeper_config3.xml @@ -1,4 +1,6 @@ + true + :: 0.0.0.0 From bae419be36f4f913f7307c0a6834dc326e0dbb4f Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 15 May 2021 18:29:07 +0300 Subject: [PATCH 080/204] Fix typo --- src/Coordination/KeeperStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index a449a106576..9e8d2a124e9 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -547,7 +547,7 @@ struct KeeperStorageCloseRequest final : public KeeperStorageRequest } }; -/// Dummy implementation TODO: impelement simple ACL +/// Dummy implementation TODO: implement simple ACL struct KeeperStorageAuthRequest final : public KeeperStorageRequest { using KeeperStorageRequest::KeeperStorageRequest; From 947f28d43038dc2740ed1e963138e767c61a91ca Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 15 May 2021 20:33:15 +0300 Subject: [PATCH 081/204] IFunction refactoring --- src/CMakeLists.txt | 4 +- src/Common/ya.make | 1 - src/Functions/CMakeLists.txt | 4 +- src/Functions/CustomWeekTransforms.h | 2 +- src/Functions/DateTimeTransforms.h | 2 +- src/Functions/FunctionBinaryArithmetic.h | 14 +- src/Functions/FunctionBitTestMany.h | 2 +- src/Functions/FunctionCustomWeekToSomething.h | 2 +- .../FunctionDateOrDateTimeAddInterval.h | 2 +- .../FunctionDateOrDateTimeToSomething.h | 2 +- src/Functions/FunctionFQDN.cpp | 2 +- src/Functions/FunctionFactory.cpp | 11 +- src/Functions/FunctionFactory.h | 19 +- src/Functions/FunctionHelpers.cpp | 73 ++- src/Functions/FunctionHelpers.h | 13 + src/Functions/FunctionIfBase.h | 2 +- src/Functions/FunctionJoinGet.cpp | 6 +- src/Functions/FunctionJoinGet.h | 18 +- src/Functions/FunctionMathBinaryFloat64.h | 2 +- src/Functions/FunctionMathConstFloat64.h | 2 +- src/Functions/FunctionMathUnary.h | 2 +- src/Functions/FunctionNumericPredicate.h | 2 +- src/Functions/FunctionStartsEndsWith.h | 2 +- src/Functions/FunctionStringOrArrayToT.h | 2 +- src/Functions/FunctionStringToString.h | 2 +- src/Functions/FunctionUnaryArithmetic.h | 2 +- src/Functions/FunctionUnixTimestamp64.h | 2 +- src/Functions/FunctionsBitmap.h | 2 +- src/Functions/FunctionsCoding.h | 2 +- src/Functions/FunctionsComparison.h | 5 +- src/Functions/FunctionsConsistentHashing.h | 2 +- src/Functions/FunctionsConversion.h | 44 +- src/Functions/FunctionsEmbeddedDictionaries.h | 2 +- src/Functions/FunctionsExternalDictionaries.h | 2 +- src/Functions/FunctionsExternalModels.h | 2 +- src/Functions/FunctionsHashing.h | 2 +- src/Functions/FunctionsJSON.h | 2 +- src/Functions/FunctionsLogical.h | 2 +- src/Functions/FunctionsMiscellaneous.h | 26 +- .../FunctionsMultiStringFuzzySearch.h | 2 +- src/Functions/FunctionsMultiStringPosition.h | 2 +- src/Functions/FunctionsMultiStringSearch.h | 2 +- src/Functions/FunctionsRandom.h | 2 +- src/Functions/FunctionsRound.h | 2 +- src/Functions/FunctionsStringArray.h | 2 +- src/Functions/FunctionsStringHash.h | 2 +- src/Functions/FunctionsStringSearch.h | 2 +- src/Functions/FunctionsStringSearchToString.h | 2 +- src/Functions/FunctionsStringSimilarity.h | 2 +- src/Functions/FunctionsVisitParam.h | 2 +- src/Functions/IFunction.cpp | 578 +++++------------- src/Functions/IFunction.h | 172 ++++-- src/Functions/IFunctionAdaptors.h | 179 +----- src/Functions/IFunctionImpl.h | 307 ---------- src/Functions/IFunctionOld.cpp | 91 +++ src/Functions/IFunctionOld.h | 129 ++++ src/Functions/LeastGreatestGeneric.h | 14 +- src/Functions/PerformanceAdaptors.h | 2 +- src/Functions/URL/port.cpp | 2 +- src/Functions/addressToLine.cpp | 2 +- src/Functions/addressToSymbol.cpp | 2 +- src/Functions/appendTrailingCharIfAbsent.cpp | 2 +- src/Functions/array/FunctionArrayMapped.h | 2 +- src/Functions/array/array.cpp | 2 +- src/Functions/array/arrayConcat.cpp | 2 +- src/Functions/array/arrayDistinct.cpp | 2 +- src/Functions/array/arrayElement.cpp | 2 +- src/Functions/array/arrayEnumerate.cpp | 2 +- src/Functions/array/arrayEnumerateExtended.h | 2 +- src/Functions/array/arrayEnumerateRanked.h | 2 +- src/Functions/array/arrayFlatten.cpp | 2 +- src/Functions/array/arrayIndex.h | 2 +- src/Functions/array/arrayIntersect.cpp | 2 +- src/Functions/array/arrayJoin.cpp | 2 +- src/Functions/array/arrayPop.h | 2 +- src/Functions/array/arrayPush.h | 2 +- src/Functions/array/arrayReduce.cpp | 2 +- src/Functions/array/arrayReduceInRanges.cpp | 2 +- src/Functions/array/arrayResize.cpp | 2 +- src/Functions/array/arrayReverse.cpp | 2 +- src/Functions/array/arraySlice.cpp | 2 +- src/Functions/array/arrayUniq.cpp | 2 +- src/Functions/array/arrayWithConstant.cpp | 2 +- src/Functions/array/emptyArray.cpp | 2 +- src/Functions/array/emptyArrayToSingle.cpp | 2 +- src/Functions/array/hasAllAny.h | 2 +- src/Functions/array/range.cpp | 2 +- src/Functions/assumeNotNull.cpp | 2 +- src/Functions/bar.cpp | 2 +- src/Functions/bitmaskToList.cpp | 2 +- src/Functions/blockNumber.cpp | 2 +- src/Functions/blockSerializedSize.cpp | 2 +- src/Functions/blockSize.cpp | 2 +- src/Functions/buildId.cpp | 2 +- src/Functions/byteSize.cpp | 2 +- src/Functions/coalesce.cpp | 2 +- src/Functions/concat.cpp | 14 +- src/Functions/connectionId.cpp | 2 +- src/Functions/countDigits.cpp | 2 +- src/Functions/countMatches.h | 2 +- src/Functions/currentDatabase.cpp | 2 +- src/Functions/currentUser.cpp | 2 +- src/Functions/dateDiff.cpp | 2 +- src/Functions/defaultValueOfArgumentType.cpp | 2 +- src/Functions/defaultValueOfTypeName.cpp | 2 +- src/Functions/dumpColumnStructure.cpp | 2 +- src/Functions/errorCodeToName.cpp | 2 +- src/Functions/evalMLMethod.cpp | 2 +- src/Functions/extractAllGroups.h | 2 +- src/Functions/extractTextFromHTML.cpp | 2 +- src/Functions/filesystem.cpp | 2 +- src/Functions/finalizeAggregation.cpp | 2 +- src/Functions/formatDateTime.cpp | 2 +- src/Functions/formatReadable.h | 2 +- src/Functions/formatReadableTimeDelta.cpp | 2 +- src/Functions/formatRow.cpp | 12 +- src/Functions/formatString.cpp | 2 +- src/Functions/fromModifiedJulianDay.cpp | 21 +- src/Functions/fromUnixTimestamp64Micro.cpp | 2 +- src/Functions/fromUnixTimestamp64Milli.cpp | 2 +- src/Functions/fromUnixTimestamp64Nano.cpp | 2 +- src/Functions/fuzzBits.cpp | 2 +- src/Functions/geoToH3.cpp | 2 +- src/Functions/geohashesInBox.cpp | 2 +- src/Functions/getMacro.cpp | 2 +- src/Functions/getScalar.cpp | 2 +- src/Functions/getSetting.cpp | 2 +- src/Functions/getSizeOfEnumType.cpp | 2 +- src/Functions/globalVariable.cpp | 2 +- src/Functions/greatCircleDistance.cpp | 2 +- src/Functions/hasColumnInTable.cpp | 2 +- src/Functions/hostName.cpp | 2 +- src/Functions/identity.cpp | 2 +- src/Functions/if.cpp | 2 +- src/Functions/ifNotFinite.cpp | 2 +- src/Functions/ifNull.cpp | 2 +- src/Functions/ignore.cpp | 2 +- src/Functions/in.cpp | 2 +- src/Functions/indexHint.cpp | 2 +- src/Functions/initializeAggregation.cpp | 2 +- src/Functions/isConstant.cpp | 2 +- src/Functions/isDecimalOverflow.cpp | 2 +- src/Functions/isIPAddressContainedIn.cpp | 2 +- src/Functions/isNotNull.cpp | 2 +- src/Functions/isNull.cpp | 2 +- src/Functions/isZeroOrNull.cpp | 2 +- src/Functions/logTrace.cpp | 2 +- src/Functions/lowCardinalityIndices.cpp | 2 +- src/Functions/lowCardinalityKeys.cpp | 2 +- src/Functions/map.cpp | 2 +- src/Functions/materialize.h | 2 +- src/Functions/neighbor.cpp | 2 +- src/Functions/now.cpp | 18 +- src/Functions/now64.cpp | 16 +- src/Functions/nullIf.cpp | 2 +- src/Functions/partitionId.cpp | 2 +- src/Functions/pointInEllipses.cpp | 2 +- src/Functions/randConstant.cpp | 16 +- src/Functions/randomFixedString.cpp | 2 +- src/Functions/randomPrintableASCII.cpp | 2 +- src/Functions/randomString.cpp | 2 +- src/Functions/randomStringUTF8.cpp | 2 +- src/Functions/repeat.cpp | 2 +- src/Functions/replicate.cpp | 2 +- src/Functions/replicate.h | 2 +- src/Functions/reverse.cpp | 12 +- src/Functions/rowNumberInAllBlocks.cpp | 2 +- src/Functions/rowNumberInBlock.cpp | 2 +- src/Functions/runningAccumulate.cpp | 2 +- src/Functions/runningConcurrency.cpp | 20 +- src/Functions/runningDifference.h | 2 +- src/Functions/sleep.h | 2 +- src/Functions/substring.cpp | 2 +- src/Functions/subtractDays.cpp | 2 +- src/Functions/subtractHours.cpp | 2 +- src/Functions/subtractMinutes.cpp | 2 +- src/Functions/subtractMonths.cpp | 2 +- src/Functions/subtractQuarters.cpp | 2 +- src/Functions/subtractSeconds.cpp | 2 +- src/Functions/subtractWeeks.cpp | 2 +- src/Functions/subtractYears.cpp | 2 +- src/Functions/throwIf.cpp | 2 +- src/Functions/tid.cpp | 2 +- src/Functions/timeSlot.cpp | 2 +- src/Functions/timeSlots.cpp | 2 +- src/Functions/timezone.cpp | 2 +- src/Functions/timezoneOf.cpp | 18 +- src/Functions/toColumnTypeName.cpp | 2 +- src/Functions/toCustomWeek.cpp | 2 +- src/Functions/toFixedString.h | 2 +- src/Functions/toLowCardinality.cpp | 2 +- src/Functions/toModifiedJulianDay.cpp | 18 +- src/Functions/toNullable.cpp | 2 +- src/Functions/toStartOfInterval.cpp | 2 +- src/Functions/toTimezone.cpp | 2 +- src/Functions/toTypeName.cpp | 18 +- src/Functions/toUnixTimestamp64Micro.cpp | 2 +- src/Functions/toUnixTimestamp64Milli.cpp | 2 +- src/Functions/toUnixTimestamp64Nano.cpp | 2 +- src/Functions/today.cpp | 18 +- src/Functions/transform.cpp | 5 +- src/Functions/tuple.cpp | 2 +- src/Functions/tupleElement.cpp | 2 +- src/Functions/uptime.cpp | 2 +- src/Functions/version.cpp | 2 +- src/Functions/visibleWidth.cpp | 2 +- src/Functions/ya.make | 1 + src/Functions/yesterday.cpp | 18 +- src/Interpreters/ActionsDAG.cpp | 29 +- src/Interpreters/ActionsVisitor.cpp | 5 +- src/Interpreters/ExpressionJIT.cpp | 13 +- src/Interpreters/ExpressionJIT.h | 2 +- src/Interpreters/JIT/CompileDAG.h | 2 +- src/Interpreters/JIT/compileFunction.cpp | 4 +- src/Interpreters/JIT/compileFunction.h | 4 +- src/Interpreters/castColumn.cpp | 3 +- src/Storages/MergeTree/KeyCondition.cpp | 3 +- 217 files changed, 1022 insertions(+), 1322 deletions(-) delete mode 100644 src/Functions/IFunctionImpl.h create mode 100644 src/Functions/IFunctionOld.cpp create mode 100644 src/Functions/IFunctionOld.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 292adc0e124..d22a69c211c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -106,8 +106,8 @@ endif() list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD}) list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON}) -list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp Functions/FunctionsLogical.cpp) -list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h Functions/FunctionsLogical.h) +list (APPEND dbms_sources Functions/IFunction.cpp Functions/IFunctionOld.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp Functions/FunctionsLogical.cpp) +list (APPEND dbms_headers Functions/IFunctionOld.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h Functions/FunctionsLogical.h) list (APPEND dbms_sources AggregateFunctions/AggregateFunctionFactory.cpp diff --git a/src/Common/ya.make b/src/Common/ya.make index dde1e6ae013..f12b17827f7 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -18,7 +18,6 @@ PEERDIR( contrib/libs/openssl contrib/libs/poco/NetSSL_OpenSSL contrib/libs/re2 - contrib/libs/cxxsupp/libcxxabi-parts contrib/restricted/dragonbox ) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 7e9e953eabe..ba876e02424 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -5,8 +5,8 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -list(REMOVE_ITEM clickhouse_functions_sources IFunctionImpl.cpp FunctionFactory.cpp FunctionHelpers.cpp) -list(REMOVE_ITEM clickhouse_functions_headers IFunctionImpl.h FunctionFactory.h FunctionHelpers.h) +list(REMOVE_ITEM clickhouse_functions_sources IFunctionOld.cpp FunctionFactory.cpp FunctionHelpers.cpp) +list(REMOVE_ITEM clickhouse_functions_headers IFunctionOld.h FunctionFactory.h FunctionHelpers.h) add_library(clickhouse_functions ${clickhouse_functions_sources}) diff --git a/src/Functions/CustomWeekTransforms.h b/src/Functions/CustomWeekTransforms.h index 98b7c38f266..f07f2777cec 100644 --- a/src/Functions/CustomWeekTransforms.h +++ b/src/Functions/CustomWeekTransforms.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 86cac4f4222..aa8f52b335e 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 508c598b0ed..0d90eece3e2 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -23,7 +23,7 @@ #include #include #include "Core/DecimalFunctions.h" -#include "IFunctionImpl.h" +#include "IFunctionOld.h" #include "FunctionHelpers.h" #include "IsOperation.h" #include "DivisionUtils.h" @@ -1532,11 +1532,11 @@ private: }; template