ClickHouse/tests/integration/test_backward_compatibility/test_functions.py

246 lines
8.3 KiB
Python

# pylint: disable=unused-argument
# pylint: disable=line-too-long
# pylint: disable=call-var-from-loop
# pylint: disable=redefined-outer-name
import logging
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.client import QueryRuntimeException
cluster = ClickHouseCluster(__file__)
upstream = cluster.add_instance("upstream", allow_analyzer=False)
backward = cluster.add_instance(
"backward",
image="clickhouse/clickhouse-server",
# Note that a bug changed the string representation of several aggregations in 22.9 and 22.10 and some minor
# releases of 22.8, 22.7 and 22.3
# See https://github.com/ClickHouse/ClickHouse/issues/42916
# Affected at least: singleValueOrNull, last_value, min, max, any, anyLast, anyHeavy, first_value, argMin, argMax
tag="22.6",
with_installed_binary=True,
allow_analyzer=False,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_aggregate_states(start_cluster):
"""
This test goes though all aggregate functions that:
- has only one argument
- support string as an argument
And do a simple check by creating the aggregate state with one string.
Yes this is not covers everything (does not cover functions with
different number of arguments, types, different states in case of
multiple values - uniqCombined, but as for uniqCombined it will be
checked via uniqHLL12), but at least something.
And about type, String had been selected, since it more likely that
there will be used some hash function internally.
"""
aggregate_functions = backward.query(
"""
SELECT if(NOT empty(alias_to), alias_to, name)
FROM system.functions
WHERE is_aggregate = 1
"""
)
aggregate_functions = aggregate_functions.strip().split("\n")
aggregate_functions = map(lambda x: x.strip(), aggregate_functions)
aggregate_functions = list(aggregate_functions)
logging.info("Got %s aggregate functions", len(aggregate_functions))
skipped = 0
failed = 0
passed = 0
def get_aggregate_state_hex(node, function_name):
return node.query(
f"select hex(initializeAggregation('{function_name}State', 'foo'))"
).strip()
for aggregate_function in aggregate_functions:
logging.info("Checking %s", aggregate_function)
try:
backward_state = get_aggregate_state_hex(backward, aggregate_function)
except QueryRuntimeException as e:
error_message = str(e)
allowed_errors = [
"NUMBER_OF_ARGUMENTS_DOESNT_MATCH",
"ILLEGAL_TYPE_OF_ARGUMENT",
# sequenceNextNode() and friends
"UNKNOWN_AGGREGATE_FUNCTION",
# Function X takes exactly one parameter:
# The function 'X' can only be used as a window function
"BAD_ARGUMENTS",
# aggThrow
"AGGREGATE_FUNCTION_THROW",
]
if any(map(lambda x: x in error_message, allowed_errors)):
logging.info("Skipping %s", aggregate_function)
skipped += 1
continue
logging.exception("Failed %s", function)
failed += 1
continue
upstream_state = get_aggregate_state_hex(upstream, aggregate_function)
if upstream_state != backward_state:
logging.info(
"Failed %s, %s (backward) != %s (upstream)",
aggregate_function,
backward_state,
upstream_state,
)
failed += 1
else:
logging.info("OK %s", aggregate_function)
passed += 1
logging.info(
"Aggregate functions: %s, Failed: %s, skipped: %s, passed: %s",
len(aggregate_functions),
failed,
skipped,
passed,
)
assert failed == 0
assert passed > 0
assert failed + passed + skipped == len(aggregate_functions)
def test_string_functions(start_cluster):
functions = backward.query(
"""
SELECT if(NOT empty(alias_to), alias_to, name)
FROM system.functions
WHERE is_aggregate = 0
"""
)
functions = functions.strip().split("\n")
functions = map(lambda x: x.strip(), functions)
excludes = [
"rand",
"rand64",
"randConstant",
"generateUUIDv4",
# Syntax error otherwise
"position",
"substring",
"CAST",
"getTypeSerializationStreams",
# NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument
# 22.8 Backward Incompatible Change: Extended range of Date32
"toDate32OrZero",
"toDate32OrDefault",
# 23.9 changed the base64-handling library from Turbo base64 to aklomp-base64. They differ in the way they deal with base64 values
# that are not properly padded by '=', for example below test value v='foo'. (Depending on the specification/context, padding is
# mandatory or optional). The former lib produces a value based on implicit padding, the latter lib throws an error.
"FROM_BASE64",
"base64Decode",
# PR #56913 (in v23.11) corrected the way tryBase64Decode() behaved with invalid inputs. Old versions return garbage, new versions
# return an empty string (as it was always documented).
"tryBase64Decode",
# Removed in 23.9
"meiliMatch",
]
functions = filter(lambda x: x not in excludes, functions)
functions = list(functions)
logging.info("Got %s functions", len(functions))
skipped = 0
failed = 0
passed = 0
def get_function_value(node, function_name, value):
return node.query(f"select {function_name}('{value}')").strip()
v = "foo"
for function in functions:
logging.info("Checking %s('%s')", function, v)
try:
backward_value = get_function_value(backward, function, v)
except QueryRuntimeException as e:
error_message = str(e)
allowed_errors = [
# Messages
"Cannot load time zone ",
"No macro ",
"Should start with ", # POINT/POLYGON/...
"Cannot read input: expected a digit but got something else:",
# ErrorCodes
"NUMBER_OF_ARGUMENTS_DOESNT_MATCH",
"ILLEGAL_TYPE_OF_ARGUMENT",
"TOO_FEW_ARGUMENTS_FOR_FUNCTION",
"DICTIONARIES_WAS_NOT_LOADED",
"CANNOT_PARSE_UUID",
"CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING",
"ILLEGAL_COLUMN",
"TYPE_MISMATCH",
"SUPPORT_IS_DISABLED",
"CANNOT_PARSE_DATE",
"UNKNOWN_SETTING",
"CANNOT_PARSE_BOOL",
"FILE_DOESNT_EXIST",
"NOT_IMPLEMENTED",
"BAD_GET",
"UNKNOWN_TYPE",
# addressToSymbol
"FUNCTION_NOT_ALLOWED",
# Date functions
"CANNOT_PARSE_TEXT",
"CANNOT_PARSE_DATETIME",
# Function X takes exactly one parameter:
# The function 'X' can only be used as a window function
"BAD_ARGUMENTS",
]
if any(map(lambda x: x in error_message, allowed_errors)):
logging.info("Skipping %s", function)
skipped += 1
continue
logging.exception("Failed %s", function)
failed += 1
continue
upstream_value = get_function_value(upstream, function, v)
if upstream_value != backward_value:
logging.warning(
"Failed %s('%s') %s (backward) != %s (upstream)",
function,
v,
backward_value,
upstream_value,
)
failed += 1
else:
logging.info("OK %s", function)
passed += 1
logging.info(
"Functions: %s, failed: %s, skipped: %s, passed: %s",
len(functions),
failed,
skipped,
passed,
)
assert failed == 0
assert passed > 0
assert failed + passed + skipped == len(functions)