# pylint: disable=unused-argument # pylint: disable=line-too-long # pylint: disable=call-var-from-loop # pylint: disable=redefined-outer-name import logging import pytest from helpers.cluster import ClickHouseCluster from helpers.client import QueryRuntimeException cluster = ClickHouseCluster(__file__) upstream = cluster.add_instance("upstream", allow_analyzer=False) backward = cluster.add_instance( "backward", image="clickhouse/clickhouse-server", # Note that a bug changed the string representation of several aggregations in 22.9 and 22.10 and some minor # releases of 22.8, 22.7 and 22.3 # See https://github.com/ClickHouse/ClickHouse/issues/42916 # Affected at least: singleValueOrNull, last_value, min, max, any, anyLast, anyHeavy, first_value, argMin, argMax tag="22.6", with_installed_binary=True, allow_analyzer=False, ) @pytest.fixture(scope="module") def start_cluster(): try: cluster.start() yield cluster finally: cluster.shutdown() def test_aggregate_states(start_cluster): """ This test goes though all aggregate functions that: - has only one argument - support string as an argument And do a simple check by creating the aggregate state with one string. Yes this is not covers everything (does not cover functions with different number of arguments, types, different states in case of multiple values - uniqCombined, but as for uniqCombined it will be checked via uniqHLL12), but at least something. And about type, String had been selected, since it more likely that there will be used some hash function internally. """ aggregate_functions = backward.query( """ SELECT if(NOT empty(alias_to), alias_to, name) FROM system.functions WHERE is_aggregate = 1 """ ) aggregate_functions = aggregate_functions.strip().split("\n") aggregate_functions = map(lambda x: x.strip(), aggregate_functions) aggregate_functions = list(aggregate_functions) logging.info("Got %s aggregate functions", len(aggregate_functions)) skipped = 0 failed = 0 passed = 0 def get_aggregate_state_hex(node, function_name): return node.query( f"select hex(initializeAggregation('{function_name}State', 'foo'))" ).strip() for aggregate_function in aggregate_functions: logging.info("Checking %s", aggregate_function) try: backward_state = get_aggregate_state_hex(backward, aggregate_function) except QueryRuntimeException as e: error_message = str(e) allowed_errors = [ "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", "ILLEGAL_TYPE_OF_ARGUMENT", # sequenceNextNode() and friends "UNKNOWN_AGGREGATE_FUNCTION", # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", # aggThrow "AGGREGATE_FUNCTION_THROW", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", aggregate_function) skipped += 1 continue logging.exception("Failed %s", function) failed += 1 continue upstream_state = get_aggregate_state_hex(upstream, aggregate_function) if upstream_state != backward_state: logging.info( "Failed %s, %s (backward) != %s (upstream)", aggregate_function, backward_state, upstream_state, ) failed += 1 else: logging.info("OK %s", aggregate_function) passed += 1 logging.info( "Aggregate functions: %s, Failed: %s, skipped: %s, passed: %s", len(aggregate_functions), failed, skipped, passed, ) assert failed == 0 assert passed > 0 assert failed + passed + skipped == len(aggregate_functions) def test_string_functions(start_cluster): functions = backward.query( """ SELECT if(NOT empty(alias_to), alias_to, name) FROM system.functions WHERE is_aggregate = 0 """ ) functions = functions.strip().split("\n") functions = map(lambda x: x.strip(), functions) excludes = [ "rand", "rand64", "randConstant", "generateUUIDv4", # Syntax error otherwise "position", "substring", "CAST", "getTypeSerializationStreams", # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument # 22.8 Backward Incompatible Change: Extended range of Date32 "toDate32OrZero", "toDate32OrDefault", ] functions = filter(lambda x: x not in excludes, functions) functions = list(functions) logging.info("Got %s functions", len(functions)) skipped = 0 failed = 0 passed = 0 def get_function_value(node, function_name, value): return node.query(f"select {function_name}('{value}')").strip() v = "foo" for function in functions: logging.info("Checking %s('%s')", function, v) try: backward_value = get_function_value(backward, function, v) except QueryRuntimeException as e: error_message = str(e) allowed_errors = [ # Messages "Cannot load time zone ", "No macro ", "Should start with ", # POINT/POLYGON/... "Cannot read input: expected a digit but got something else:", # ErrorCodes "NUMBER_OF_ARGUMENTS_DOESNT_MATCH", "ILLEGAL_TYPE_OF_ARGUMENT", "TOO_FEW_ARGUMENTS_FOR_FUNCTION", "DICTIONARIES_WAS_NOT_LOADED", "CANNOT_PARSE_UUID", "CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING", "ILLEGAL_COLUMN", "TYPE_MISMATCH", "SUPPORT_IS_DISABLED", "CANNOT_PARSE_DATE", "UNKNOWN_SETTING", "CANNOT_PARSE_BOOL", "FILE_DOESNT_EXIST", "NOT_IMPLEMENTED", "BAD_GET", "UNKNOWN_TYPE", # addressToSymbol "FUNCTION_NOT_ALLOWED", # Date functions "CANNOT_PARSE_TEXT", "CANNOT_PARSE_DATETIME", # Function X takes exactly one parameter: # The function 'X' can only be used as a window function "BAD_ARGUMENTS", ] if any(map(lambda x: x in error_message, allowed_errors)): logging.info("Skipping %s", function) skipped += 1 continue logging.exception("Failed %s", function) failed += 1 continue upstream_value = get_function_value(upstream, function, v) if upstream_value != backward_value: logging.warning( "Failed %s('%s') %s (backward) != %s (upstream)", function, v, backward_value, upstream_value, ) failed += 1 else: logging.info("OK %s", function) passed += 1 logging.info( "Functions: %s, failed: %s, skipped: %s, passed: %s", len(functions), failed, skipped, passed, ) assert failed == 0 assert passed > 0 assert failed + passed + skipped == len(functions)