From 3ebc3852f404a1ce392e9b66b8356fa9da701097 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 22 Jul 2024 15:28:29 +0000 Subject: [PATCH 01/26] Allow filtering ip addresses by ip family in DNS resolver --- programs/server/Server.cpp | 3 + src/Common/DNSResolver.cpp | 93 ++++++-- src/Common/DNSResolver.h | 9 + src/Core/ServerSettings.h | 2 + src/Core/SettingsChangesHistory.cpp | 261 +++++++++++++++++++++++ tests/integration/test_dns_cache/test.py | 67 +++++- 6 files changed, 418 insertions(+), 17 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7800ee9ff00..3126f65ef09 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1768,6 +1768,9 @@ try new_server_settings.http_connections_store_limit, }); + DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6); + + if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 4b577a251af..051e6e63091 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include "DNSPTRResolverProvider.h" @@ -139,12 +141,6 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase & cache, const std::string & host) -{ - auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared(resolveIPAddressImpl(host), std::chrono::system_clock::now());}); - return result->addresses; -} - std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) { auto ptr_resolver = DB::DNSPTRResolverProvider::get(); @@ -198,21 +194,90 @@ struct DNSResolver::Impl std::atomic disable_cache{false}; }; +struct DNSResolver::AddressFilter +{ + struct DNSFilterSettings + { + std::atomic dns_allow_resolve_names_to_ipv4{true}; + std::atomic dns_allow_resolve_names_to_ipv6{true}; + }; -DNSResolver::DNSResolver() : impl(std::make_unique()), log(getLogger("DNSResolver")) {} + void performAddressFiltering(DNSResolver::IPAddresses & addresses) + { + bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6; + + if (dns_resolve_ipv4 && dns_resolve_ipv6) + { + return; + } + if (!dns_resolve_ipv4 && !dns_resolve_ipv6) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); + } + addresses.erase( + std::remove_if(addresses.begin(), addresses.end(), + [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }), + addresses.end() + ); + } + + void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) + { + settings.dns_allow_resolve_names_to_ipv4 = dns_allow_resolve_names_to_ipv4_; + settings.dns_allow_resolve_names_to_ipv6 = dns_allow_resolve_names_to_ipv6_; + } + + DNSFilterSettings settings; +}; + + +DNSResolver::DNSResolver() + : impl(std::make_unique()) + , addressFilter(std::make_unique()) + , log(getLogger("DNSResolver")) {} + + +DNSResolver::IPAddresses DNSResolver::getResolvedIPAdressessWithFiltering(const std::string & host) +{ + auto addresses = resolveIPAddressImpl(host); + addressFilter->performAddressFiltering(addresses); + + if (addresses.empty()) + { + ProfileEvents::increment(ProfileEvents::DNSError); + throw DB::NetException(ErrorCodes::DNS_ERROR, "After filtering there are no resolved address for host({}).", host); + } + return addresses; +} + +DNSResolver::IPAddresses DNSResolver::resolveIPAddressWithCache(const std::string & host) +{ + auto [result, _ ] = impl->cache_host.getOrSet(host, [&host, this]() {return std::make_shared(getResolvedIPAdressessWithFiltering(host), std::chrono::system_clock::now());}); + return result->addresses; +} Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { return pickAddress(resolveHostAll(host)); // random order -> random pick } +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) +{ + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_); +} + DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) { if (impl->disable_cache) - return resolveIPAddressImpl(host); + return getResolvedIPAdressessWithFiltering(host); addToNewHosts(host); - return resolveIPAddressWithCache(impl->cache_host, host); + return resolveIPAddressWithCache(host); } DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) @@ -232,7 +297,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an splitHostAndPort(host_and_port, host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) @@ -241,7 +306,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) @@ -254,7 +319,7 @@ std::vector DNSResolver::resolveAddressList(const std: if (!impl->disable_cache) addToNewHosts(host); - std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host); + std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(host); auto ips_end = std::unique(ips.begin(), ips.end()); addresses.reserve(ips_end - ips.begin()); @@ -419,8 +484,8 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures) bool DNSResolver::updateHost(const String & host) { - const auto old_value = resolveIPAddressWithCache(impl->cache_host, host); - auto new_value = resolveIPAddressImpl(host); + const auto old_value = resolveIPAddressWithCache(host); + auto new_value = getResolvedIPAdressessWithFiltering(host); const bool result = old_value != new_value; impl->cache_host.set(host, std::make_shared(std::move(new_value), std::chrono::system_clock::now())); return result; diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 1ddd9d3b991..b35f55dfcd2 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -68,6 +68,8 @@ public: /// Returns true if IP of any host has been changed or an element was dropped (too many failures) bool updateCache(UInt32 max_consecutive_failures); + void setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6); + /// Returns a copy of cache entries std::vector> cacheEntries() const; @@ -86,6 +88,10 @@ private: struct Impl; std::unique_ptr impl; + + struct AddressFilter; + std::unique_ptr addressFilter; + LoggerPtr log; /// Updates cached value and returns true it has been changed. @@ -94,6 +100,9 @@ private: void addToNewHosts(const String & host); void addToNewAddresses(const Poco::Net::IPAddress & address); + + IPAddresses resolveIPAddressWithCache(const std::string & host); + IPAddresses getResolvedIPAdressessWithFiltering(const std::string & host); }; } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d13e6251ca9..6c23e3b95f6 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -106,6 +106,8 @@ namespace DB M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \ \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 0ccbd874a3d..a76e214b1fc 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -500,6 +500,267 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index a6db26c8575..5e120dc42aa 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -32,6 +32,7 @@ node2 = cluster.add_instance( main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], with_zookeeper=True, ipv6_address="2001:3984:3989::1:1112", + ipv4_address="10.5.95.11", ) @@ -39,9 +40,6 @@ node2 = cluster.add_instance( def cluster_without_dns_cache_update(): try: cluster.start() - - _fill_nodes([node1, node2], "test_table_drop") - yield cluster except Exception as ex: @@ -59,6 +57,8 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # In this case we should manually set up the static DNS entries on the source host # to exclude resplving addresses automatically added by docker. # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. + _fill_nodes([node1, node2], "test_table_drop") + node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") @@ -98,6 +98,67 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7") +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + host_ipv6 = node2.ipv6_address + host_ipv4 = node2.ipv4_address + + node2.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node2.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node2.query("SYSTEM DROP DNS CACHE") + node2.query("SYSTEM DROP CONNECTIONS CACHE") + node2.query("SYSTEM RELOAD CONFIG") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node2.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node2.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node2.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node2.query("SYSTEM RELOAD CONFIG") + + node3 = cluster.add_instance( "node3", main_configs=["configs/listen_host.xml"], From 0d5cb9f75a527e90aed18860efbd5ed1f9dcd775 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Tue, 23 Jul 2024 09:25:02 +0000 Subject: [PATCH 02/26] Review fixes --- src/Common/DNSResolver.cpp | 34 ++++++++++++++--------------- src/Core/SettingsChangesHistory.cpp | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 051e6e63091..08111d7f2af 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,8 +12,7 @@ #include #include #include -#include -#include +#include "Common/MultiVersion.h" #include #include "DNSPTRResolverProvider.h" @@ -198,14 +197,17 @@ struct DNSResolver::AddressFilter { struct DNSFilterSettings { - std::atomic dns_allow_resolve_names_to_ipv4{true}; - std::atomic dns_allow_resolve_names_to_ipv6{true}; + bool dns_allow_resolve_names_to_ipv4{true}; + bool dns_allow_resolve_names_to_ipv6{true}; }; + AddressFilter() : settings(std::make_unique()) {} + void performAddressFiltering(DNSResolver::IPAddresses & addresses) { - bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4; - bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6; + const auto current_settings = settings.get(); + bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = current_settings->dns_allow_resolve_names_to_ipv6; if (dns_resolve_ipv4 && dns_resolve_ipv6) { @@ -215,24 +217,20 @@ struct DNSResolver::AddressFilter { throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); } - addresses.erase( - std::remove_if(addresses.begin(), addresses.end(), - [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) - { - return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) - || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); - }), - addresses.end() - ); + + std::erase_if(addresses, [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }); } void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) { - settings.dns_allow_resolve_names_to_ipv4 = dns_allow_resolve_names_to_ipv4_; - settings.dns_allow_resolve_names_to_ipv6 = dns_allow_resolve_names_to_ipv6_; + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_)); } - DNSFilterSettings settings; + MultiVersion settings; }; diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index a76e214b1fc..01b9bca795f 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -523,7 +523,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 12:58:50 +0000 Subject: [PATCH 03/26] Fix test --- src/Core/SettingsChangesHistory.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 01b9bca795f..ac427e2e03e 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -523,9 +523,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 16:56:29 +0000 Subject: [PATCH 04/26] Fix tidy build --- src/Common/DNSResolver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 08111d7f2af..bbee7d259f0 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -203,7 +203,7 @@ struct DNSResolver::AddressFilter AddressFilter() : settings(std::make_unique()) {} - void performAddressFiltering(DNSResolver::IPAddresses & addresses) + void performAddressFiltering(DNSResolver::IPAddresses & addresses) const { const auto current_settings = settings.get(); bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; From 304d01e2c36ae11cd1c703135c493ee885bc3062 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 29 Jul 2024 19:18:14 +0000 Subject: [PATCH 05/26] Review fix --- programs/server/Server.cpp | 1 - src/Common/DNSResolver.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3126f65ef09..aa7c3d75163 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1770,7 +1770,6 @@ try DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6); - if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index bbee7d259f0..68a8fa7d74c 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -225,9 +225,9 @@ struct DNSResolver::AddressFilter }); } - void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) + void setSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) { - settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_)); + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6)); } MultiVersion settings; @@ -264,9 +264,9 @@ Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) return pickAddress(resolveHostAll(host)); // random order -> random pick } -void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) { - addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_); + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6); } DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) From 4143eea587b808086cceb98025906646fa78c96a Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 31 Jul 2024 08:35:38 +0000 Subject: [PATCH 06/26] Add test to skip parallel --- tests/integration/parallel_skip.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 99fa626bd1e..6689572aeb7 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -1,6 +1,7 @@ [ "test_dns_cache/test.py::test_dns_cache_update", "test_dns_cache/test.py::test_ip_change_drop_dns_cache", + "test_dns_cache/test.py::test_dns_resolver_filter", "test_dns_cache/test.py::test_ip_change_update_dns_cache", "test_dns_cache/test.py::test_user_access_ip_change[node0]", "test_dns_cache/test.py::test_user_access_ip_change[node1]", From 012ea3cc6d09c50f29fe4d7964aa18ee038c35b2 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 31 Jul 2024 13:29:36 +0000 Subject: [PATCH 07/26] Rebase --- src/Core/SettingsChangesHistory.cpp | 259 ----------------------- tests/integration/test_dns_cache/test.py | 138 ++++++------ 2 files changed, 74 insertions(+), 323 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index ac427e2e03e..0ccbd874a3d 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -500,265 +500,6 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index 5e120dc42aa..36401517429 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -32,7 +32,6 @@ node2 = cluster.add_instance( main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], with_zookeeper=True, ipv6_address="2001:3984:3989::1:1112", - ipv4_address="10.5.95.11", ) @@ -40,6 +39,9 @@ node2 = cluster.add_instance( def cluster_without_dns_cache_update(): try: cluster.start() + + _fill_nodes([node1, node2], "test_table_drop") + yield cluster except Exception as ex: @@ -57,8 +59,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # In this case we should manually set up the static DNS entries on the source host # to exclude resplving addresses automatically added by docker. # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. - _fill_nodes([node1, node2], "test_table_drop") - node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") @@ -98,67 +98,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7") -def _render_filter_config(allow_ipv4, allow_ipv6): - config = f""" - - {int(allow_ipv4)} - {int(allow_ipv6)} - - """ - return config - - -@pytest.mark.parametrize( - "allow_ipv4, allow_ipv6", - [ - (True, False), - (False, True), - (False, False), - ], -) -def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): - host_ipv6 = node2.ipv6_address - host_ipv4 = node2.ipv4_address - - node2.set_hosts( - [ - (host_ipv6, "test_host"), - (host_ipv4, "test_host"), - ] - ) - node2.replace_config( - "/etc/clickhouse-server/config.d/dns_filter.xml", - _render_filter_config(allow_ipv4, allow_ipv6), - ) - - node2.query("SYSTEM DROP DNS CACHE") - node2.query("SYSTEM DROP CONNECTIONS CACHE") - node2.query("SYSTEM RELOAD CONFIG") - - if not allow_ipv4 and not allow_ipv6: - with pytest.raises(QueryRuntimeException): - node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") - else: - node2.query("SELECT * FROM remote('test_host', system, one)") - assert ( - node2.query( - "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" - ) - == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" - ) - - node2.exec_in_container( - [ - "bash", - "-c", - "rm /etc/clickhouse-server/config.d/dns_filter.xml", - ], - privileged=True, - user="root", - ) - node2.query("SYSTEM RELOAD CONFIG") - - node3 = cluster.add_instance( "node3", main_configs=["configs/listen_host.xml"], @@ -378,3 +317,74 @@ def test_host_is_drop_from_cache_after_consecutive_failures( assert node4.wait_for_log_line( "Cached hosts dropped:.*InvalidHostThatDoesNotExist.*" ) + + +node7 = cluster.add_instance( + "node7", + main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], + with_zookeeper=True, + ipv6_address="2001:3984:3989::1:1117", + ipv4_address="10.5.95.17", +) + + +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + node = node7 + host_ipv6 = node.ipv6_address + host_ipv4 = node.ipv4_address + + node.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node.query("SYSTEM RELOAD CONFIG") + node.query("SYSTEM DROP DNS CACHE") + node.query("SYSTEM DROP CONNECTIONS CACHE") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node.query("SYSTEM RELOAD CONFIG") From 7d01c3131265d0dfae24fbf6ab91c71073573765 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 16:01:13 +0200 Subject: [PATCH 08/26] Delete old code of named collections --- src/Core/PostgreSQL/PoolWithFailover.cpp | 2 +- src/Core/PostgreSQL/PoolWithFailover.h | 9 +- src/Dictionaries/HTTPDictionarySource.cpp | 26 +- src/Dictionaries/MongoDBDictionarySource.cpp | 61 ++-- .../PostgreSQLDictionarySource.cpp | 131 ++++++-- .../ExternalDataSourceConfiguration.cpp | 288 ------------------ .../ExternalDataSourceConfiguration.h | 92 ------ src/Storages/NamedCollectionsHelpers.h | 2 +- src/Storages/StorageExternalDistributed.h | 2 - src/TableFunctions/TableFunctionMongoDB.cpp | 1 - src/TableFunctions/TableFunctionRedis.cpp | 1 - 11 files changed, 170 insertions(+), 445 deletions(-) delete mode 100644 src/Storages/ExternalDataSourceConfiguration.cpp delete mode 100644 src/Storages/ExternalDataSourceConfiguration.h diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 5014564dbe0..054fc3b2226 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -23,7 +23,7 @@ namespace postgres { PoolWithFailover::PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 502a9a9b7d7..2237c752367 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -8,7 +8,6 @@ #include "ConnectionHolder.h" #include #include -#include #include @@ -20,12 +19,12 @@ namespace postgres class PoolWithFailover { - -using RemoteDescription = std::vector>; - public: + using ReplicasConfigurationByPriority = std::map>; + using RemoteDescription = std::vector>; + PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 663c63dd6c6..d6df03b39df 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -8,12 +8,12 @@ #include #include #include -#include #include #include #include "DictionarySourceFactory.h" #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" +#include #include "registerDictionaries.h" @@ -223,21 +223,23 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) String endpoint; String format; - auto named_collection = created_from_ddl - ? getURLBasedDataSourceConfiguration(config, settings_config_prefix, global_context) - : std::nullopt; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { - url = named_collection->configuration.url; - endpoint = named_collection->configuration.endpoint; - format = named_collection->configuration.format; + validateNamedCollection( + *named_collection, + /* required_keys */{}, + /* optional_keys */ValidateKeysMultiset{ + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); - credentials.setUsername(named_collection->configuration.user); - credentials.setPassword(named_collection->configuration.password); + url = named_collection->getOrDefault("url", ""); + endpoint = named_collection->getOrDefault("endpoint", ""); + format = named_collection->getOrDefault("format", ""); - header_entries.reserve(named_collection->configuration.headers.size()); - for (const auto & [key, value] : named_collection->configuration.headers) - header_entries.emplace_back(key, value); + credentials.setUsername(named_collection->getAnyOrDefault({"user", "credentials.user"}, "")); + credentials.setPassword(named_collection->getAnyOrDefault({"password", "credentials.password"}, "")); + + header_entries = getHeadersFromNamedCollection(*named_collection); } else { diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index c30a6f90e44..7bacfdab3d2 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,15 +1,12 @@ #include "MongoDBDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include #include +#include namespace DB { -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"}; - void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) { auto create_mongo_db_dictionary = []( @@ -22,35 +19,53 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) bool created_from_ddl) { const auto config_prefix = root_config_prefix + ".mongodb"; - ExternalDataSourceConfiguration configuration; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key); }; - auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key); + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, config_prefix, context) : nullptr; + + String host, username, password, database, method, options, collection; + UInt16 port; if (named_collection) { - configuration = named_collection->configuration; + validateNamedCollection( + *named_collection, + /* required_keys */{"collection"}, + /* optional_keys */ValidateKeysMultiset{ + "host", "port", "user", "password", "db", "database", "uri", "name", "method", "options"}); + + host = named_collection->getOrDefault("host", ""); + port = static_cast(named_collection->getOrDefault("port", 0)); + username = named_collection->getOrDefault("user", ""); + password = named_collection->getOrDefault("password", ""); + database = named_collection->getAnyOrDefault({"db", "database"}, ""); + method = named_collection->getOrDefault("method", ""); + collection = named_collection->getOrDefault("collection", ""); + options = named_collection->getOrDefault("options", ""); } else { - configuration.host = config.getString(config_prefix + ".host", ""); - configuration.port = config.getUInt(config_prefix + ".port", 0); - configuration.username = config.getString(config_prefix + ".user", ""); - configuration.password = config.getString(config_prefix + ".password", ""); - configuration.database = config.getString(config_prefix + ".db", ""); + host = config.getString(config_prefix + ".host", ""); + port = config.getUInt(config_prefix + ".port", 0); + username = config.getString(config_prefix + ".user", ""); + password = config.getString(config_prefix + ".password", ""); + database = config.getString(config_prefix + ".db", ""); + method = config.getString(config_prefix + ".method", ""); + collection = config.getString(config_prefix + ".collection"); + options = config.getString(config_prefix + ".options", ""); } if (created_from_ddl) - context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); + context->getRemoteHostFilter().checkHostAndPort(host, toString(port)); - return std::make_unique(dict_struct, + return std::make_unique( + dict_struct, config.getString(config_prefix + ".uri", ""), - configuration.host, - configuration.port, - configuration.username, - configuration.password, - config.getString(config_prefix + ".method", ""), - configuration.database, - config.getString(config_prefix + ".collection"), - config.getString(config_prefix + ".options", ""), + host, + port, + username, + password, + method, + database, + collection, + options, sample_block); }; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index f62a9a009d8..fd026a97cd4 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -13,7 +13,7 @@ #include "readInvalidateQuery.h" #include #include -#include +#include #include #endif @@ -30,7 +30,7 @@ namespace ErrorCodes static const UInt64 max_block_size = 8192; -static const std::unordered_set dictionary_allowed_keys = { +static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; @@ -179,6 +179,19 @@ std::string PostgreSQLDictionarySource::toString() const #endif +static void validateConfigKeys( + const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) +{ + Poco::Util::AbstractConfiguration::Keys config_keys; + dict_config.keys(config_prefix, config_keys); + for (const auto & config_key : config_keys) + { + if (dictionary_allowed_keys.contains(config_key) || startsWith(config_key, "replica")) + continue; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); + } +} + void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { auto create_table_source = [=](const DictionaryStructure & dict_struct, @@ -191,38 +204,118 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { #if USE_LIBPQXX const auto settings_config_prefix = config_prefix + ".postgresql"; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); }; - auto configuration = getExternalDataSourceConfigurationByPriority(config, settings_config_prefix, context, has_config_key); const auto & settings = context->getSettingsRef(); + std::optional dictionary_configuration; + String database, schema, table; + postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; + + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; + if (named_collection) + { + validateNamedCollection>(*named_collection, {}, dictionary_allowed_keys); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = named_collection->getOrDefault("host", ""); + common_configuration.port = named_collection->getOrDefault("port", 0); + common_configuration.username = named_collection->getOrDefault("user", ""); + common_configuration.password = named_collection->getOrDefault("password", ""); + common_configuration.database = named_collection->getAnyOrDefault({"database", "db"}, ""); + common_configuration.schema = named_collection->getOrDefault("schema", ""); + common_configuration.table = named_collection->getOrDefault("table", ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration{ + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = named_collection->getOrDefault("query", ""), + .where = named_collection->getOrDefault("where", ""), + .invalidate_query = named_collection->getOrDefault("invalidate_query", ""), + .update_field = named_collection->getOrDefault("update_field", ""), + .update_lag = named_collection->getOrDefault("update_lag", 1), + }); + + replicas_by_priority[0].emplace_back(common_configuration); + } + else + { + validateConfigKeys(config, settings_config_prefix); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = config.getString(settings_config_prefix + ".host", ""); + common_configuration.port = config.getUInt(settings_config_prefix + ".port", 0); + common_configuration.username = config.getString(settings_config_prefix + ".user", ""); + common_configuration.password = config.getString(settings_config_prefix + ".password", ""); + common_configuration.database = config.getString(fmt::format("{}.database", settings_config_prefix), config.getString(fmt::format("{}.db", settings_config_prefix), "")); + common_configuration.schema = config.getString(fmt::format("{}.schema", settings_config_prefix), ""); + common_configuration.table = config.getString(fmt::format("{}.table", settings_config_prefix), ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration + { + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), + .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), + .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), + .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), + .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) + }); + + + if (config.has(settings_config_prefix + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(settings_config_prefix, config_keys); + + for (const auto & config_key : config_keys) + { + if (config_key.starts_with("replica")) + { + String replica_name = settings_config_prefix + "." + config_key; + StoragePostgreSQL::Configuration replica_configuration{common_configuration}; + + size_t priority = config.getInt(replica_name + ".priority", 0); + replica_configuration.host = config.getString(replica_name + ".host", common_configuration.host); + replica_configuration.port = config.getUInt(replica_name + ".port", common_configuration.port); + replica_configuration.username = config.getString(replica_name + ".user", common_configuration.username); + replica_configuration.password = config.getString(replica_name + ".password", common_configuration.password); + + if (replica_configuration.host.empty() || replica_configuration.port == 0 + || replica_configuration.username.empty() || replica_configuration.password.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Named collection of connection parameters is missing some " + "of the parameters and no other dictionary parameters are added"); + } + + replicas_by_priority[priority].emplace_back(replica_configuration); + } + } + } + else + { + replicas_by_priority[0].emplace_back(common_configuration); + } + } if (created_from_ddl) { - for (const auto & replicas : configuration.replicas_configurations) - for (const auto & replica : replicas.second) + for (const auto & [_, replicas] : replicas_by_priority) + for (const auto & replica : replicas) context->getRemoteHostFilter().checkHostAndPort(replica.host, toString(replica.port)); } + auto pool = std::make_shared( - configuration.replicas_configurations, + replicas_by_priority, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, settings.postgresql_connection_pool_retries, settings.postgresql_connection_pool_auto_close_connection, settings.postgresql_connection_attempt_timeout); - PostgreSQLDictionarySource::Configuration dictionary_configuration - { - .db = configuration.database, - .schema = configuration.schema, - .table = configuration.table, - .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), - .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), - .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), - .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), - .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) - }; - return std::make_unique(dict_struct, dictionary_configuration, pool, sample_block); + return std::make_unique(dict_struct, dictionary_configuration.value(), pool, sample_block); #else (void)dict_struct; (void)config; diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp deleted file mode 100644 index 41979f8d91c..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ /dev/null @@ -1,288 +0,0 @@ -#include "ExternalDataSourceConfiguration.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "quota_key", "db", - "database", "table", "schema", "replica", - "update_field", "update_lag", "invalidate_query", "query", - "where", "name", "secure", "uri", "collection"}; - - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - SettingsChanges config_settings; - for (const auto & setting : settings.all()) - { - const auto & setting_name = setting.getName(); - auto setting_value = config.getString(config_prefix + '.' + setting_name, ""); - if (!setting_value.empty()) - config_settings.emplace_back(setting_name, setting_value); - } - return config_settings; -} - - -String ExternalDataSourceConfiguration::toString() const -{ - WriteBufferFromOwnString configuration_info; - configuration_info << "username: " << username << "\t"; - if (addresses.empty()) - { - configuration_info << "host: " << host << "\t"; - configuration_info << "port: " << port << "\t"; - } - else - { - for (const auto & [replica_host, replica_port] : addresses) - { - configuration_info << "host: " << replica_host << "\t"; - configuration_info << "port: " << replica_port << "\t"; - } - } - return configuration_info.str(); -} - - -void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration & conf) -{ - host = conf.host; - port = conf.port; - username = conf.username; - password = conf.password; - quota_key = conf.quota_key; - database = conf.database; - table = conf.table; - schema = conf.schema; - addresses = conf.addresses; - addresses_expr = conf.addresses_expr; -} - - -static void validateConfigKeys( - const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix, HasConfigKeyFunc has_config_key_func) -{ - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(config_prefix, config_keys); - for (const auto & config_key : config_keys) - { - if (!has_config_key_func(config_key)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); - } -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration configuration; - - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - validateConfigKeys(dict_config, collection_prefix, has_config_key); - auto config_settings = getSettingsChangesFromConfig(settings, config, collection_prefix); - auto dict_settings = getSettingsChangesFromConfig(settings, dict_config, dict_config_prefix); - /// dictionary config settings override collection settings. - config_settings.insert(config_settings.end(), dict_settings.begin(), dict_settings.end()); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.host = dict_config.getString(dict_config_prefix + ".host", config.getString(collection_prefix + ".host", "")); - configuration.port = dict_config.getInt(dict_config_prefix + ".port", config.getUInt(collection_prefix + ".port", 0)); - configuration.username = dict_config.getString(dict_config_prefix + ".user", config.getString(collection_prefix + ".user", "")); - configuration.password = dict_config.getString(dict_config_prefix + ".password", config.getString(collection_prefix + ".password", "")); - configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", config.getString(collection_prefix + ".quota_key", "")); - configuration.database = dict_config.getString(dict_config_prefix + ".db", config.getString(dict_config_prefix + ".database", - config.getString(collection_prefix + ".db", config.getString(collection_prefix + ".database", "")))); - configuration.table = dict_config.getString(dict_config_prefix + ".table", config.getString(collection_prefix + ".table", "")); - configuration.schema = dict_config.getString(dict_config_prefix + ".schema", config.getString(collection_prefix + ".schema", "")); - - if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and dictionary parameters are not added"); - } - return ExternalDataSourceInfo{.configuration = configuration, .settings_changes = config_settings}; - } - return std::nullopt; -} - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context) -{ - URLBasedDataSourceConfiguration configuration; - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.url = - dict_config.getString(dict_config_prefix + ".url", config.getString(collection_prefix + ".url", "")); - configuration.endpoint = - dict_config.getString(dict_config_prefix + ".endpoint", config.getString(collection_prefix + ".endpoint", "")); - configuration.format = - dict_config.getString(dict_config_prefix + ".format", config.getString(collection_prefix + ".format", "")); - configuration.compression_method = - dict_config.getString(dict_config_prefix + ".compression", config.getString(collection_prefix + ".compression_method", "")); - configuration.structure = - dict_config.getString(dict_config_prefix + ".structure", config.getString(collection_prefix + ".structure", "")); - configuration.user = - dict_config.getString(dict_config_prefix + ".credentials.user", config.getString(collection_prefix + ".credentials.user", "")); - configuration.password = - dict_config.getString(dict_config_prefix + ".credentials.password", config.getString(collection_prefix + ".credentials.password", "")); - - String headers_prefix; - const Poco::Util::AbstractConfiguration *headers_config = nullptr; - if (dict_config.has(dict_config_prefix + ".headers")) - { - headers_prefix = dict_config_prefix + ".headers"; - headers_config = &dict_config; - } - else - { - headers_prefix = collection_prefix + ".headers"; - headers_config = &config; - } - - if (headers_config) - { - Poco::Util::AbstractConfiguration::Keys header_keys; - headers_config->keys(headers_prefix, header_keys); - headers_prefix += "."; - for (const auto & header : header_keys) - { - const auto header_prefix = headers_prefix + header; - configuration.headers.emplace_back( - headers_config->getString(header_prefix + ".name"), - headers_config->getString(header_prefix + ".value")); - } - } - - return URLBasedDataSourceConfig{ .configuration = configuration }; - } - - return std::nullopt; -} - -ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration common_configuration; - - auto named_collection = getExternalDataSourceConfiguration(dict_config, dict_config_prefix, context, has_config_key); - if (named_collection) - { - common_configuration = named_collection->configuration; - } - else - { - common_configuration.host = dict_config.getString(dict_config_prefix + ".host", ""); - common_configuration.port = dict_config.getUInt(dict_config_prefix + ".port", 0); - common_configuration.username = dict_config.getString(dict_config_prefix + ".user", ""); - common_configuration.password = dict_config.getString(dict_config_prefix + ".password", ""); - common_configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", ""); - common_configuration.database = dict_config.getString(dict_config_prefix + ".db", dict_config.getString(dict_config_prefix + ".database", "")); - common_configuration.table = dict_config.getString(fmt::format("{}.table", dict_config_prefix), ""); - common_configuration.schema = dict_config.getString(fmt::format("{}.schema", dict_config_prefix), ""); - } - - ExternalDataSourcesByPriority configuration - { - .database = common_configuration.database, - .table = common_configuration.table, - .schema = common_configuration.schema, - .replicas_configurations = {} - }; - - if (dict_config.has(dict_config_prefix + ".replica")) - { - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(dict_config_prefix, config_keys); - - for (const auto & config_key : config_keys) - { - if (config_key.starts_with("replica")) - { - ExternalDataSourceConfiguration replica_configuration(common_configuration); - String replica_name = dict_config_prefix + "." + config_key; - validateConfigKeys(dict_config, replica_name, has_config_key); - - size_t priority = dict_config.getInt(replica_name + ".priority", 0); - replica_configuration.host = dict_config.getString(replica_name + ".host", common_configuration.host); - replica_configuration.port = dict_config.getUInt(replica_name + ".port", common_configuration.port); - replica_configuration.username = dict_config.getString(replica_name + ".user", common_configuration.username); - replica_configuration.password = dict_config.getString(replica_name + ".password", common_configuration.password); - replica_configuration.quota_key = dict_config.getString(replica_name + ".quota_key", common_configuration.quota_key); - - if (replica_configuration.host.empty() || replica_configuration.port == 0 - || replica_configuration.username.empty() || replica_configuration.password.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and no other dictionary parameters are added"); - } - - configuration.replicas_configurations[priority].emplace_back(replica_configuration); - } - } - } - else - { - configuration.replicas_configurations[0].emplace_back(common_configuration); - } - - return configuration; -} - - -void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration & conf) -{ - url = conf.url; - format = conf.format; - compression_method = conf.compression_method; - structure = conf.structure; - http_method = conf.http_method; - headers = conf.headers; -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings); - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -} diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h deleted file mode 100644 index c703c9ce999..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -#define EMPTY_SETTINGS(M, ALIAS) -DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -struct EmptySettings : public BaseSettings {}; - -struct ExternalDataSourceConfiguration -{ - String host; - UInt16 port = 0; - String username = "default"; - String password; - String quota_key; - String database; - String table; - String schema; - - std::vector> addresses; /// Failover replicas. - String addresses_expr; - - String toString() const; - - void set(const ExternalDataSourceConfiguration & conf); -}; - - -using StorageSpecificArgs = std::vector>; - -struct ExternalDataSourceInfo -{ - ExternalDataSourceConfiguration configuration; - SettingsChanges settings_changes; -}; - -using HasConfigKeyFunc = std::function; - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings = {}); - - -/// Highest priority is 0, the bigger the number in map, the less the priority. -using ExternalDataSourcesConfigurationByPriority = std::map>; - -struct ExternalDataSourcesByPriority -{ - String database; - String table; - String schema; - ExternalDataSourcesConfigurationByPriority replicas_configurations; -}; - -ExternalDataSourcesByPriority -getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key); - -struct URLBasedDataSourceConfiguration -{ - String url; - String endpoint; - String format = "auto"; - String compression_method = "auto"; - String structure = "auto"; - - String user; - String password; - - HTTPHeaderEntries headers; - String http_method; - - void set(const URLBasedDataSourceConfiguration & conf); -}; - -struct URLBasedDataSourceConfig -{ - URLBasedDataSourceConfiguration configuration; -}; - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context); - -} diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index f444a581eb6..bf2da7235a2 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -133,7 +133,7 @@ void validateNamedCollection( { throw Exception( ErrorCodes::BAD_ARGUMENTS, - "Unexpected key {} in named collection. Required keys: {}, optional keys: {}", + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", backQuoteIfNeed(key), fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); } } diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index c4d37c3e5cc..56c7fe86f34 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -8,8 +8,6 @@ namespace DB { -struct ExternalDataSourceConfiguration; - /// Storages MySQL and PostgreSQL use ConnectionPoolWithFailover and support multiple replicas. /// This class unites multiple storages with replicas into multiple shards with replicas. /// A query to external database is passed to one replica on each shard, the result is united. diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index b2cf1b4675e..94279d1bf6d 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -1,5 +1,4 @@ #include -#include #include diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index f87ba6d1c6d..aca751c2840 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -15,7 +15,6 @@ #include #include -#include namespace DB From f6e1eb1643888c2b8bbc179899cbc4bacaee5b78 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 16:31:48 +0200 Subject: [PATCH 09/26] Fix style check --- src/Dictionaries/HTTPDictionarySource.cpp | 2 +- src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index d6df03b39df..bf19f912723 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -230,7 +230,7 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) *named_collection, /* required_keys */{}, /* optional_keys */ValidateKeysMultiset{ - "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); url = named_collection->getOrDefault("url", ""); endpoint = named_collection->getOrDefault("endpoint", ""); diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index fd026a97cd4..fd426de126d 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -24,6 +24,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } #if USE_LIBPQXX From 077f10a4ada2a561111207d8e99e22d2c8e48f40 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 18:26:48 +0200 Subject: [PATCH 10/26] Fix build --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index fd426de126d..8e472f85a6e 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -4,6 +4,7 @@ #include #include #include "DictionarySourceFactory.h" +#include #include "registerDictionaries.h" #if USE_LIBPQXX @@ -13,7 +14,6 @@ #include "readInvalidateQuery.h" #include #include -#include #include #endif @@ -27,14 +27,14 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -#if USE_LIBPQXX - -static const UInt64 max_block_size = 8192; - static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; +#if USE_LIBPQXX + +static const UInt64 max_block_size = 8192; + namespace { ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct, const String & schema, const String & table, const String & query, const String & where) @@ -178,8 +178,6 @@ std::string PostgreSQLDictionarySource::toString() const return "PostgreSQL: " + configuration.db + '.' + configuration.table + (where.empty() ? "" : ", where: " + where); } -#endif - static void validateConfigKeys( const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) { @@ -193,6 +191,8 @@ static void validateConfigKeys( } } +#endif + void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { auto create_table_source = [=](const DictionaryStructure & dict_struct, From 1b49e2492521c54b0e6240d412af847d3fa21221 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:26:31 +0200 Subject: [PATCH 11/26] Fix clang-tidy --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 8e472f85a6e..b1bab17e2e9 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -208,7 +208,6 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) const auto & settings = context->getSettingsRef(); std::optional dictionary_configuration; - String database, schema, table; postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; From 9dee9ecfb4adee4cff099f13afe61bdc2c38170e Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 17 Aug 2024 10:33:35 -0600 Subject: [PATCH 12/26] Fix incorrect default value for postgresql_connection_pool_auto_close_connection in docs --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index de601fe02dc..5bf1fe197ae 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1381,7 +1381,7 @@ Default value: `2`. Close connection before returning connection to the pool. -Default value: true. +Default value: false. ## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size} From 427016a450cad536e8cbaf4de04d07313456aa4b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 6 Aug 2024 21:39:41 +0200 Subject: [PATCH 13/26] CI: Functional tests to store artifacts on timeout --- docker/test/fasttest/run.sh | 18 +--------- docker/test/sqllogic/Dockerfile | 1 - docker/test/sqllogic/run.sh | 2 +- docker/test/sqltest/Dockerfile | 1 - docker/test/stateful/run.sh | 25 +------------- docker/test/stateless/Dockerfile | 1 - docker/test/stateless/run.sh | 21 ++---------- docker/test/stateless/utils.lib | 16 --------- tests/ci/ci.py | 30 ++++++++-------- tests/ci/ci_definitions.py | 3 +- tests/ci/functional_test_check.py | 37 +++++++++++++++----- tests/ci/report.py | 11 +++--- tests/ci/tee_popen.py | 57 +++++++++++++++++++++++++++---- 13 files changed, 107 insertions(+), 116 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 394d31addb1..9920326b11c 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -256,22 +256,6 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function run_tests { clickhouse-server --version @@ -340,7 +324,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - timeout_with_logging 35m bash -c run_tests ||: + run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1425e12cd84..6397526388e 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -35,7 +35,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ ENV TZ=Europe/Amsterdam -ENV MAX_RUN_TIME=9000 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index ccba344035e..32368980f9b 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -94,7 +94,7 @@ function run_tests() export -f run_tests -timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2 +run_tests #/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 71d915b0c7a..b805bb03c2b 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/" RUN git clone ${sqltest_repo} ENV TZ=UTC -ENV MAX_RUN_TIME=900 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY run.sh / diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 3a4f0d97993..c072eeb0fa8 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -4,9 +4,6 @@ source /setup_export_logs.sh set -e -x -MAX_RUN_TIME=${MAX_RUN_TIME:-3600} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME)) - # Choose random timezone for this test run TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" @@ -123,9 +120,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] clickhouse-client --query "DROP TABLE datasets.hits_v1" clickhouse-client --query "DROP TABLE datasets.visits_v1" - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) else clickhouse-client --query "CREATE DATABASE test" clickhouse-client --query "SHOW TABLES FROM test" @@ -257,24 +251,7 @@ function run_tests() export -f run_tests -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - -TIMEOUT=$((MAX_RUN_TIME - 700)) -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index d8eb072328f..b0c4914a4e8 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -65,7 +65,6 @@ ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 -ENV MAX_RUN_TIME=0 # Unrelated to vars in setup_minio.sh, but should be the same there # to have the same binaries for local running scenario diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c70cbe1fe45..ad0cd321cc5 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,9 +12,6 @@ dmesg --clear # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-9000} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME)) - USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} @@ -308,8 +305,6 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" - TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800)) - START_TIME=${SECONDS} set +e TEST_ARGS=( @@ -324,32 +319,22 @@ function run_tests() --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" ) - timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ + clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e - DURATION=$((SECONDS - START_TIME)) - - echo "Elapsed ${DURATION} seconds." - if [[ $DURATION -ge $TIMEOUT ]] - then - echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds." - fi } export -f run_tests - -# This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \ | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index cb257536c36..31cd67254b4 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -40,22 +40,6 @@ function fn_exists() { declare -F "$1" > /dev/null; } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function collect_core_dumps() { find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 49b597333dc..1208d8642ad 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -50,7 +50,6 @@ from github_helper import GitHub from pr_info import PRInfo from report import ( ERROR, - FAILURE, PENDING, SUCCESS, BuildResult, @@ -62,11 +61,11 @@ from report import ( FAIL, ) from s3_helper import S3Helper -from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings from ci_buddy import CIBuddy +from stopwatch import Stopwatch from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -370,8 +369,8 @@ def _pre_action(s3, job_name, batch, indata, pr_info): # skip_status = SUCCESS already there GH.print_in_group("Commit Status Data", job_status) - # create pre report - jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped) + # create dummy report + jr = JobReport.create_dummy(status=skip_status, job_skipped=to_be_skipped) jr.dump() if not to_be_skipped: @@ -990,19 +989,20 @@ def _run_test(job_name: str, run_command: str) -> int: stopwatch = Stopwatch() job_log = Path(TEMP_PATH) / "job_log.txt" with TeePopen(run_command, job_log, env, timeout) as process: + print(f"Job process started, pid [{process.process.pid}]") retcode = process.wait() if retcode != 0: print(f"Run action failed for: [{job_name}] with exit code [{retcode}]") - if timeout and process.timeout_exceeded: - print(f"Timeout {timeout} exceeded, dumping the job report") - JobReport( - status=FAILURE, - description=f"Timeout {timeout} exceeded", - test_results=[TestResult.create_check_timeout_expired(timeout)], - start_time=stopwatch.start_time_str, - duration=stopwatch.duration_seconds, - additional_files=[job_log], - ).dump() + if process.timeout_exceeded: + print(f"Job timed out: [{job_name}] exit code [{retcode}]") + assert JobReport.exist(), "JobReport real or dummy must be present" + jr = JobReport.load() + if jr.dummy: + print( + f"ERROR: Run action failed with timeout and did not generate JobReport - update dummy report with execution time" + ) + jr.test_results = [TestResult.create_check_timeout_expired()] + jr.duration = stopwatch.duration_seconds print(f"Run action done for: [{job_name}]") return retcode @@ -1205,7 +1205,7 @@ def main() -> int: job_report ), "BUG. There must be job report either real report, or pre-report if job was killed" error_description = "" - if not job_report.pre_report: + if not job_report.dummy: # it's a real job report ch_helper = ClickHouseHelper() check_url = "" diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1bed9db06f2..1d1c39f913d 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -332,7 +332,7 @@ class JobConfig: # will be triggered for the job if omitted in CI workflow yml run_command: str = "" # job timeout, seconds - timeout: Optional[int] = None + timeout: int = 7200 # sets number of batches for a multi-batch job num_batches: int = 1 # label that enables job in CI, if set digest isn't used @@ -421,7 +421,6 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=9000, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index b7391eff01b..d08f98fa05f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -5,10 +5,11 @@ import csv import logging import os import re +import signal import subprocess import sys from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Optional from build_download_helper import download_all_deb_packages from clickhouse_helper import CiLogsCredentials @@ -25,11 +26,12 @@ from report import ( TestResults, read_test_results, FAILURE, + TestResult, ) from stopwatch import Stopwatch from tee_popen import TeePopen from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell NO_CHANGES_MSG = "Nothing to run" @@ -113,10 +115,6 @@ def get_run_command( if flaky_check: envs.append("-e NUM_TRIES=50") - envs.append("-e MAX_RUN_TIME=2800") - else: - max_run_time = os.getenv("MAX_RUN_TIME", "0") - envs.append(f"-e MAX_RUN_TIME={max_run_time}") envs += [f"-e {e}" for e in additional_envs] @@ -128,7 +126,7 @@ def get_run_command( ) return ( - f"docker run --volume={builds_path}:/package_folder " + f"docker run --rm --name func-tester --volume={builds_path}:/package_folder " # For dmesg and sysctl "--privileged " f"{ci_logs_args}" @@ -198,7 +196,7 @@ def process_results( state, description = status[0][0], status[0][1] if ret_code != 0: state = ERROR - description += " (but script exited with an error)" + description = f"Job failed, exit code: {ret_code}. " + description try: results_path = result_directory / "test_results.tsv" @@ -240,7 +238,19 @@ def parse_args(): return parser.parse_args() +test_process = None # type: Optional[TeePopen] +timeout_expired = False + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + Shell.check(f"docker exec func-tester pkill -f clickhouse-test", verbose=True) + + def main(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: # pylint: disable=protected-access @@ -328,11 +338,13 @@ def main(): logging.info("Going to run func tests: %s", run_command) with TeePopen(run_command, run_log_path) as process: + global test_process + test_process = process retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: - logging.info("Run failed") + logging.info("Run failed, exit code %s", retcode) try: subprocess.check_call( @@ -348,6 +360,13 @@ def main(): state, description, test_results, additional_logs = process_results( retcode, result_path, server_log_path ) + if timeout_expired: + description = "Timeout expired" + state = FAILURE + test_results.insert( + 0, TestResult.create_check_timeout_expired(stopwatch.duration_seconds) + ) + else: print( "This is validate bugfix or flaky check run, but no changes test to run - skip with success" diff --git a/tests/ci/report.py b/tests/ci/report.py index 6779a6dae96..c2632719aef 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -249,6 +249,7 @@ JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" JOB_STARTED_TEST_NAME = "STARTED" JOB_FINISHED_TEST_NAME = "COMPLETED" +JOB_TIMEOUT_TEST_NAME = "Job Timeout Expired" @dataclass @@ -277,8 +278,8 @@ class TestResult: self.log_files.append(log_path) @staticmethod - def create_check_timeout_expired(timeout: float) -> "TestResult": - return TestResult("Check timeout expired", "FAIL", timeout) + def create_check_timeout_expired(duration: Optional[float] = None) -> "TestResult": + return TestResult(JOB_TIMEOUT_TEST_NAME, "FAIL", time=duration) TestResults = List[TestResult] @@ -303,7 +304,7 @@ class JobReport: # indicates that this is not real job report but report for the job that was skipped by rerun check job_skipped: bool = False # indicates that report generated by CI script in order to check later if job was killed before real report is generated - pre_report: bool = False + dummy: bool = False exit_code: int = -1 @staticmethod @@ -311,7 +312,7 @@ class JobReport: return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") @classmethod - def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport": + def create_dummy(cls, status: str, job_skipped: bool) -> "JobReport": return JobReport( status=status, description="", @@ -320,7 +321,7 @@ class JobReport: duration=0.0, additional_files=[], job_skipped=job_skipped, - pre_report=True, + dummy=True, ) def update_duration(self): diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 13db50df53f..ad3e62dab9c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -2,6 +2,8 @@ import logging import os +import signal +import subprocess import sys from io import TextIOWrapper from pathlib import Path @@ -30,20 +32,35 @@ class TeePopen: self._process = None # type: Optional[Popen] self.timeout = timeout self.timeout_exceeded = False + self.terminated_by_sigterm = False + self.terminated_by_sigkill = False + self.pid = 0 def _check_timeout(self) -> None: if self.timeout is None: return sleep(self.timeout) + logging.warning( + "Timeout exceeded. Send SIGTERM to process %s, timeout %s", + self.process.pid, + self.timeout, + ) + self.send_signal(signal.SIGTERM) + time_wait = 0 + self.terminated_by_sigterm = True self.timeout_exceeded = True + while self.process.poll() is None and time_wait < 100: + print("wait...") + wait = 5 + sleep(wait) + time_wait += wait while self.process.poll() is None: - logging.warning( - "Killing process %s, timeout %s exceeded", - self.process.pid, - self.timeout, + logging.error( + "Process is still running. Send SIGKILL", ) - os.killpg(self.process.pid, 9) - sleep(10) + self.send_signal(signal.SIGKILL) + self.terminated_by_sigkill = True + sleep(5) def __enter__(self) -> "TeePopen": self.process = Popen( @@ -57,6 +74,9 @@ class TeePopen: bufsize=1, errors="backslashreplace", ) + sleep(1) + self.pid = self._get_child_pid() + print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -77,6 +97,22 @@ class TeePopen: self.log_file.close() + def _get_child_pid(self): + # linux only + ps_command = f"ps --ppid {self.process.pid} -o pid=" + res = "NA" + try: + result = subprocess.run( + ps_command, shell=True, capture_output=True, text=True + ) + res = result.stdout.strip() + pid = int(res) + return pid + except Exception as e: + print(f"Failed to get child's pid, command [{ps_command}], result [{res}]") + print(f"ERROR: getting Python subprocess PID: {e}") + return self.process.pid + def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: @@ -85,6 +121,15 @@ class TeePopen: return self.process.wait() + def poll(self): + return self.process.poll() + + def send_signal(self, signal_num): + if self.pid: + os.kill(self.pid, signal_num) + else: + print("ERROR: no process to send signal") + @property def process(self) -> Popen: if self._process is not None: From 8e35b082b2e3315655110bdce4238217dfe85914 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Sat, 10 Aug 2024 10:01:16 +0200 Subject: [PATCH 14/26] teepopen fix --- tests/ci/tee_popen.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index ad3e62dab9c..53b0a0f6c2c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -3,7 +3,6 @@ import logging import os import signal -import subprocess import sys from io import TextIOWrapper from pathlib import Path @@ -34,7 +33,6 @@ class TeePopen: self.timeout_exceeded = False self.terminated_by_sigterm = False self.terminated_by_sigkill = False - self.pid = 0 def _check_timeout(self) -> None: if self.timeout is None: @@ -75,8 +73,7 @@ class TeePopen: errors="backslashreplace", ) sleep(1) - self.pid = self._get_child_pid() - print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]") + print(f"Subprocess started, pid [{self.process.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -97,22 +94,6 @@ class TeePopen: self.log_file.close() - def _get_child_pid(self): - # linux only - ps_command = f"ps --ppid {self.process.pid} -o pid=" - res = "NA" - try: - result = subprocess.run( - ps_command, shell=True, capture_output=True, text=True - ) - res = result.stdout.strip() - pid = int(res) - return pid - except Exception as e: - print(f"Failed to get child's pid, command [{ps_command}], result [{res}]") - print(f"ERROR: getting Python subprocess PID: {e}") - return self.process.pid - def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: @@ -125,10 +106,7 @@ class TeePopen: return self.process.poll() def send_signal(self, signal_num): - if self.pid: - os.kill(self.pid, signal_num) - else: - print("ERROR: no process to send signal") + os.killpg(self.process.pid, signal_num) @property def process(self) -> Popen: From 66fa5a154a8895f481a598616df93f7cb83e42cd Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 12 Aug 2024 02:34:22 +0200 Subject: [PATCH 15/26] tune timeouts, batches --- tests/ci/ci_config.py | 7 ++++--- tests/ci/ci_definitions.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 173c6c9c931..99f4ed38475 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -316,6 +316,7 @@ class CI: JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], random_bucket="parrepl_with_sanitizer", + timeout=3600, ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 @@ -346,7 +347,7 @@ class CI: required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=3, release_only=True @@ -401,14 +402,14 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], - num_batches=6, + num_batches=3, runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1d1c39f913d..13c222b10b9 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -465,6 +465,7 @@ class CommonJobConfigs: ), run_command="upgrade_check.py", runner_type=Runners.STRESS_TESTER, + timeout=3600, ) INTEGRATION_TEST = JobConfig( job_name_keyword="integration", From 1deeca40dbbbc14373e51d830b851b54b82e5efa Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 15 Aug 2024 13:11:10 +0200 Subject: [PATCH 16/26] Handling timeout in integration tests --- tests/ci/ci.py | 13 ++++++++- tests/ci/ci_config.py | 3 ++- tests/ci/integration_test_check.py | 2 +- tests/ci/integration_tests_runner.py | 40 +++++++++++++++++++++++++++- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 1208d8642ad..a9ae078b449 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1003,6 +1003,7 @@ def _run_test(job_name: str, run_command: str) -> int: ) jr.test_results = [TestResult.create_check_timeout_expired()] jr.duration = stopwatch.duration_seconds + jr.additional_files += [job_log] print(f"Run action done for: [{job_name}]") return retcode @@ -1329,10 +1330,20 @@ def main() -> int: if CI.is_test_job(args.job_name): gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) + check_url = "" + if job_report.test_results or job_report.additional_files: + check_url = upload_result_helper.upload_results( + s3, + pr_info.number, + pr_info.sha, + job_report.test_results, + job_report.additional_files, + job_report.check_name or _get_ext_check_name(args.job_name), + ) post_commit_status( commit, ERROR, - "", + check_url, "Error: " + error_description, _get_ext_check_name(args.job_name), pr_info, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 99f4ed38475..b5e424c2b3f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -402,7 +402,8 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], + num_batches=3, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 6245f0490fc..7232ca375a1 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -29,7 +29,7 @@ from stopwatch import Stopwatch import integration_tests_runner as runner from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell def get_json_params_dict( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f5dbef4f6db..d3cd3d16de1 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -9,6 +9,7 @@ import random import re import shlex import shutil +import signal import string import subprocess import sys @@ -16,11 +17,13 @@ import time import zlib # for crc32 from collections import defaultdict from itertools import chain -from typing import Any, Dict +from typing import Any, Dict, Optional from env_helper import IS_CI from integration_test_images import IMAGES from tee_popen import TeePopen +from report import JOB_TIMEOUT_TEST_NAME +from stopwatch import Stopwatch MAX_RETRY = 1 NUM_WORKERS = 5 @@ -621,6 +624,9 @@ class ClickhouseIntegrationTestsRunner: test_data_dirs = {} for i in range(num_tries): + if timeout_expired: + print("Timeout expired - break test group execution") + break logging.info("Running test group %s for the %s retry", test_group, i) clear_ip_tables_and_restart_daemons() @@ -657,6 +663,8 @@ class ClickhouseIntegrationTestsRunner: logging.info("Executing cmd: %s", cmd) # ignore retcode, since it meaningful due to pipe to tee with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc: + global runner_subprocess + runner_subprocess = proc proc.wait() extra_logs_names = [log_basename] @@ -780,6 +788,9 @@ class ClickhouseIntegrationTestsRunner: logs = [] tries_num = 1 if should_fail else FLAKY_TRIES_COUNT for i in range(tries_num): + if timeout_expired: + print("Timeout expired - break flaky check execution") + break final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( @@ -839,6 +850,7 @@ class ClickhouseIntegrationTestsRunner: return result_state, status_text, test_result, logs def run_impl(self, repo_path, build_path): + stopwatch = Stopwatch() if self.flaky_check or self.bugfix_validate_check: return self.run_flaky_check( repo_path, build_path, should_fail=self.bugfix_validate_check @@ -921,6 +933,9 @@ class ClickhouseIntegrationTestsRunner: random.shuffle(items_to_run) for group, tests in items_to_run: + if timeout_expired: + print("Timeout expired - break tests execution") + break logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( repo_path, group, tests, MAX_RETRY, NUM_WORKERS, 0 @@ -981,6 +996,17 @@ class ClickhouseIntegrationTestsRunner: status_text = "Timeout, " + status_text result_state = "failure" + if timeout_expired: + logging.error( + "Job killed by external timeout signal - setting status to failure!" + ) + status_text = "Job timeout expired, " + status_text + result_state = "failure" + # add mock test case to make timeout visible in job report and in ci db + test_result.insert( + 0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "") + ) + if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" @@ -1001,6 +1027,7 @@ def write_results(results_file, status_file, results, status): def run(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") repo_path = os.environ.get("CLICKHOUSE_TESTS_REPO_PATH") @@ -1035,5 +1062,16 @@ def run(): logging.info("Result written") +timeout_expired = False +runner_subprocess = None # type:Optional[subprocess.Popen] + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + runner_subprocess.send_signal(signal.SIGTERM) + + if __name__ == "__main__": run() From dde7ee29fc594f87bb35880bede845c4d4f29423 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 10:22:12 +0200 Subject: [PATCH 17/26] sort tests in report by status --- tests/ci/ci_config.py | 8 ++++---- tests/ci/integration_test_check.py | 2 +- tests/ci/integration_tests_runner.py | 3 ++- tests/ci/report.py | 19 +++++++++++++++---- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b5e424c2b3f..8ce0b9fde5a 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -344,7 +344,7 @@ class CI: runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=2 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 @@ -354,7 +354,7 @@ class CI: ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=4, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], @@ -403,14 +403,14 @@ class CI: ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], - num_batches=3, + num_batches=6, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], - num_batches=3, + num_batches=6, runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 7232ca375a1..6245f0490fc 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -29,7 +29,7 @@ from stopwatch import Stopwatch import integration_tests_runner as runner from ci_config import CI -from ci_utils import Utils, Shell +from ci_utils import Utils def get_json_params_dict( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index d3cd3d16de1..c3b71b85022 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -1070,7 +1070,8 @@ def handle_sigterm(signum, _frame): print(f"WARNING: Received signal {signum}") global timeout_expired timeout_expired = True - runner_subprocess.send_signal(signal.SIGTERM) + if runner_subprocess: + runner_subprocess.send_signal(signal.SIGTERM) if __name__ == "__main__": diff --git a/tests/ci/report.py b/tests/ci/report.py index c2632719aef..a1b25b994c7 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -742,10 +742,21 @@ def create_test_html_report( has_test_time = any(tr.time is not None for tr in test_results) has_log_urls = False - # Display entires with logs at the top (they correspond to failed tests) - test_results.sort( - key=lambda result: result.raw_logs is None and result.log_files is None - ) + def sort_key(status): + if "fail" in status.lower(): + return 0 + elif "error" in status.lower(): + return 1 + elif "not" in status.lower(): + return 2 + elif "ok" in status.lower(): + return 10 + elif "success" in status.lower(): + return 9 + else: + return 5 + + test_results.sort(key=lambda result: sort_key(result.status)) for test_result in test_results: colspan = 0 From 625b186b4d699fcbdd3befdb349c132d3fa65e74 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 19 Aug 2024 10:03:38 +0000 Subject: [PATCH 18/26] Fix build with -DENABLE_LIBRARIES=0 --- src/DataTypes/DataTypeObject.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index d6395155397..3997e892e9f 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -18,13 +18,15 @@ #include #include +#include "config.h" + #if USE_SIMDJSON -#include +# include +#elif USE_RAPIDJSON +# include +#else +# include #endif -#if USE_RAPIDJSON -#include -#endif -#include namespace DB { @@ -105,7 +107,7 @@ SerializationPtr DataTypeObject::doGetDefaultSerialization() const switch (schema_format) { case SchemaFormat::JSON: -#ifdef USE_SIMDJSON +#if USE_SIMDJSON return std::make_shared>( std::move(typed_path_serializations), paths_to_skip, From 2456df0d57301acb0e8c74a96e6411d95329d37c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 19 Aug 2024 13:39:29 +0200 Subject: [PATCH 19/26] Add a test --- .../test_dictionaries_postgresql/test.py | 53 ++++++++++++++++++- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index 516ac27ea26..c845a0b3d8b 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -530,10 +530,59 @@ def test_bad_configuration(started_cluster): """ ) - node1.query_and_get_error( + assert "Unexpected key `dbbb`" in node1.query_and_get_error( "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(1))" ) - assert node1.contains_in_log("Unexpected key `dbbb`") + + +def test_named_collection_from_ddl(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("DROP TABLE IF EXISTS test_table") + cursor.execute("CREATE TABLE test_table (id integer, value integer)") + + node1.query( + """ + DROP NAMED COLLECTION IF EXISTS pg_conn; + CREATE NAMED COLLECTION pg_conn + AS user = 'postgres', password = 'mysecretpassword', host = 'postgres1', port = 5432, database = 'postgres', table = 'test_table'; + """ + ) + + cursor.execute( + "INSERT INTO test_table SELECT i, i FROM generate_series(0, 99) as t(i)" + ) + + node1.query( + """ + DROP DICTIONARY IF EXISTS postgres_dict; + CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(POSTGRESQL(NAME pg_conn)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + """ + ) + result = node1.query("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))") + assert int(result.strip()) == 99 + + node1.query( + """ + DROP NAMED COLLECTION IF EXISTS pg_conn_2; + CREATE NAMED COLLECTION pg_conn_2 + AS user = 'postgres', password = 'mysecretpassword', host = 'postgres1', port = 5432, dbbb = 'postgres', table = 'test_table'; + """ + ) + node1.query( + """ + DROP DICTIONARY IF EXISTS postgres_dict; + CREATE DICTIONARY postgres_dict (id UInt32, value UInt32) + PRIMARY KEY id + SOURCE(POSTGRESQL(NAME pg_conn_2)) + LIFETIME(MIN 1 MAX 2) + LAYOUT(HASHED()); + """ + ) + assert "Unexpected key `dbbb`" in node1.query_and_get_error("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))") if __name__ == "__main__": From 612416d5344720dade987fd3f4b44c31986742bb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Aug 2024 11:56:22 +0000 Subject: [PATCH 20/26] Automatic style fix --- tests/integration/test_dictionaries_postgresql/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_dictionaries_postgresql/test.py b/tests/integration/test_dictionaries_postgresql/test.py index c845a0b3d8b..010ecdb5084 100644 --- a/tests/integration/test_dictionaries_postgresql/test.py +++ b/tests/integration/test_dictionaries_postgresql/test.py @@ -582,7 +582,9 @@ def test_named_collection_from_ddl(started_cluster): LAYOUT(HASHED()); """ ) - assert "Unexpected key `dbbb`" in node1.query_and_get_error("SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))") + assert "Unexpected key `dbbb`" in node1.query_and_get_error( + "SELECT dictGetUInt32(postgres_dict, 'value', toUInt64(99))" + ) if __name__ == "__main__": From a2eec5f0ae198a2f7991ce69b50a1e169d46bec3 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 19 Aug 2024 13:56:40 +0200 Subject: [PATCH 21/26] CI: Minor release workflow fix --- .github/workflows/release_branches.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index a5cd6321e8c..82826794ea3 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -482,7 +482,7 @@ jobs: if: ${{ !failure() }} run: | # update overall ci report - python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + python3 ./tests/ci/finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} - name: Check Workflow results if: ${{ !cancelled() }} run: | @@ -490,5 +490,4 @@ jobs: cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - python3 ./tests/ci/ci_buddy.py --check-wf-status From ecd60eab5f218883e1bd7e113ac2e49a557b88f8 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Mon, 19 Aug 2024 17:03:53 +0200 Subject: [PATCH 22/26] Stateless tests: increase hung check timeout --- tests/clickhouse-test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 1203ad3730a..5fb892597f7 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2572,12 +2572,12 @@ def do_run_tests(jobs, test_suite: TestSuite): try: clickhouse_execute( args, - query="SELECT 1 /*hang up check*/", - max_http_retries=5, - timeout=20, + query="SELECT 1 /*hung check*/", + max_http_retries=20, + timeout=10, ) except Exception: - print("Hang up check failed") + print("Hung check failed") server_died.set() if server_died.is_set(): From 2c8fade3d7da2ae934d06d55494a3896a0591f9b Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 19 Aug 2024 17:06:24 +0200 Subject: [PATCH 23/26] fix bug in mann whitney u test --- .../AggregateFunctionMannWhitney.cpp | 2 +- .../01561_mann_whitney_scipy.python | 22 ++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index fa90846650d..ecd848f5af3 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -114,7 +114,7 @@ private: { if (ind < first.size()) return first[ind]; - return second[ind % first.size()]; + return second[ind - first.size()]; } size_t size() const diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index 4713120287d..a15ab7a41d9 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -17,9 +17,20 @@ def test_and_check(name, a, b, t_stat, p_value): client.query( "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;" ) + #client.query( + # "INSERT INTO mann_whitney VALUES {};".format( + # ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) + # ) + #) client.query( "INSERT INTO mann_whitney VALUES {};".format( - ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + + client.query( + "INSERT INTO mann_whitney VALUES {};".format( + ", ".join(["({},{})".format(i, 1) for i in b]) ) ) @@ -59,6 +70,15 @@ def test_mann_whitney(): test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p) +def test_mann_whitney_skew(): + rvs1 = [1] + rvs2 = [0,2,4] + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided") + test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p) + test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p) + + if __name__ == "__main__": test_mann_whitney() + test_mann_whitney_skew() print("Ok.") From 265d49e0917c5d1d9a43d46202b5e2d5b0d56a0b Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 19 Aug 2024 17:09:06 +0200 Subject: [PATCH 24/26] remove useless comments --- tests/queries/0_stateless/01561_mann_whitney_scipy.python | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index a15ab7a41d9..16bfb78e05b 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -17,11 +17,6 @@ def test_and_check(name, a, b, t_stat, p_value): client.query( "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;" ) - #client.query( - # "INSERT INTO mann_whitney VALUES {};".format( - # ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) - # ) - #) client.query( "INSERT INTO mann_whitney VALUES {};".format( ", ".join(["({},{})".format(i, 0) for i in a]) From cc8a40ef98f640b3eb2961ba559d65805afc2fd7 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 19 Aug 2024 17:23:50 +0200 Subject: [PATCH 25/26] CI: tidy build timeout 2h -> 3h --- tests/ci/ci_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8ce0b9fde5a..5453bffd9c6 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -163,6 +163,7 @@ class CI: tidy=True, comment="clang-tidy is used for static analysis", ), + timeout=10800, ), BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( From 7fcacd16dfafebe7ae154d8b9a343a7d9ad11ec4 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 19 Aug 2024 17:43:44 +0200 Subject: [PATCH 26/26] fix black --- tests/queries/0_stateless/01561_mann_whitney_scipy.python | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index 16bfb78e05b..0f84d510933 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -67,7 +67,7 @@ def test_mann_whitney(): def test_mann_whitney_skew(): rvs1 = [1] - rvs2 = [0,2,4] + rvs2 = [0, 2, 4] s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided") test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p) test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p)