From 41573d4b7f6abcd0385dd526c8bff00cd9296997 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 10 Sep 2024 14:33:10 +0000 Subject: [PATCH 01/11] Move test setup logic and test utils into separate files --- .../test_storage_kafka/conftest.py | 92 +++ .../test_storage_kafka/kafka_tests_utils.py | 380 ++++++++++ tests/integration/test_storage_kafka/test.py | 663 ++++-------------- .../test_produce_http_interface.py | 115 +-- 4 files changed, 620 insertions(+), 630 deletions(-) create mode 100644 tests/integration/test_storage_kafka/conftest.py create mode 100644 tests/integration/test_storage_kafka/kafka_tests_utils.py diff --git a/tests/integration/test_storage_kafka/conftest.py b/tests/integration/test_storage_kafka/conftest.py new file mode 100644 index 00000000000..d0fe9ca38f9 --- /dev/null +++ b/tests/integration/test_storage_kafka/conftest.py @@ -0,0 +1,92 @@ +import pytest +import logging +import time + +from helpers.cluster import ClickHouseCluster +from test_storage_kafka.kafka_tests_utils import get_admin_client + +pytestmark = pytest.mark.skip + +KAFKA_TOPIC_OLD = "old_t" +KAFKA_CONSUMER_GROUP_OLD = "old_cg" +KAFKA_TOPIC_NEW = "new_t" +KAFKA_CONSUMER_GROUP_NEW = "new_cg" + +# We have to initialize these lazily, otherwise WORKER_FREE_PORTS is not populated making the PortPoolManager fail. +# It would be nice to initialize these eagerly, but the fixtures has to be in conftest.py to apply them automatically, +# so they "cannot be forgotten". We could put these variables and the gist of the fixture implementation into a separate +# file, but I think splitting these has more disadvantages than advantages of eager initialization. +conftest_cluster = None +conftest_instance = None + + +def init_cluster_and_instance(): + global conftest_cluster + global conftest_instance + if conftest_cluster is not None: + return + + conftest_cluster = ClickHouseCluster(__file__) + conftest_instance = conftest_cluster.add_instance( + "instance", + main_configs=["configs/kafka.xml", "configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_kafka=True, + with_zookeeper=True, # For Replicated Table + macros={ + "kafka_broker": "kafka1", + "kafka_topic_old": KAFKA_TOPIC_OLD, + "kafka_group_name_old": KAFKA_CONSUMER_GROUP_OLD, + "kafka_topic_new": KAFKA_TOPIC_NEW, + "kafka_group_name_new": KAFKA_CONSUMER_GROUP_NEW, + "kafka_client_id": "instance", + "kafka_format_json_each_row": "JSONEachRow", + }, + clickhouse_path_dir="clickhouse_path", + ) + + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + init_cluster_and_instance() + conftest_cluster.start() + kafka_id = conftest_cluster.kafka_docker_id + logging.info(f"kafka_id is {kafka_id}") + yield conftest_cluster + finally: + conftest_cluster.shutdown() + + +# kafka_cluster is requested here to ensure the cluster is initialized before we yield the instance +@pytest.fixture() +def instance(kafka_cluster): + yield conftest_instance + + +@pytest.fixture(autouse=True) +def kafka_setup_teardown(kafka_cluster, instance): + instance.query("DROP DATABASE IF EXISTS test SYNC; CREATE DATABASE test;") + admin_client = get_admin_client(kafka_cluster) + + def get_topics_to_delete(): + return [t for t in admin_client.list_topics() if not t.startswith("_")] + + topics = get_topics_to_delete() + logging.debug(f"Deleting topics: {topics}") + result = admin_client.delete_topics(topics) + for topic, error in result.topic_error_codes: + if error != 0: + logging.warning(f"Received error {error} while deleting topic {topic}") + else: + logging.info(f"Deleted topic {topic}") + + retries = 0 + topics = get_topics_to_delete() + while len(topics) != 0: + logging.info(f"Existing topics: {topics}") + if retries >= 5: + raise Exception(f"Failed to delete topics {topics}") + retries += 1 + time.sleep(0.5) + yield # run test diff --git a/tests/integration/test_storage_kafka/kafka_tests_utils.py b/tests/integration/test_storage_kafka/kafka_tests_utils.py new file mode 100644 index 00000000000..5c32221b9cf --- /dev/null +++ b/tests/integration/test_storage_kafka/kafka_tests_utils.py @@ -0,0 +1,380 @@ +import logging +import time +import avro.schema +import avro.io +import io +from kafka.admin import NewTopic +from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer +from confluent_kafka.avro.serializer.message_serializer import MessageSerializer +from contextlib import contextmanager +from google.protobuf.internal.encoder import _VarintBytes + + +# protoc --version +# libprotoc 3.0.0 +# # to create kafka_pb2.py +# protoc --python_out=. kafka.proto +from test_storage_kafka import kafka_pb2 +from test_storage_kafka import social_pb2 + + +@contextmanager +def kafka_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + kafka_create_topic( + admin_client, + topic_name, + num_partitions, + replication_factor, + max_retries, + config, + ) + try: + yield None + finally: + # Code to release resource, e.g.: + kafka_delete_topic(admin_client, topic_name, max_retries) + + +@contextmanager +def existing_kafka_topic(admin_client, topic_name, max_retries=50): + try: + yield None + finally: + kafka_delete_topic(admin_client, topic_name, max_retries) + + +def kafka_create_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + logging.debug( + f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" + ) + topics_list = [ + NewTopic( + name=topic_name, + num_partitions=num_partitions, + replication_factor=replication_factor, + topic_configs=config, + ) + ] + retries = 0 + while True: + try: + admin_client.create_topics(new_topics=topics_list, validate_only=False) + logging.debug("Admin client succeed") + return + except Exception as e: + retries += 1 + time.sleep(0.5) + if retries < max_retries: + logging.warning(f"Failed to create topic {e}") + else: + raise + + +def kafka_delete_topic(admin_client, topic, max_retries=50): + result = admin_client.delete_topics([topic]) + for topic, e in result.topic_error_codes: + if e == 0: + logging.debug(f"Topic {topic} deleted") + else: + logging.error(f"Failed to delete topic {topic}: {e}") + + retries = 0 + while True: + topics_listed = admin_client.list_topics() + logging.debug(f"TOPICS LISTED: {topics_listed}") + if topic not in topics_listed: + return + else: + retries += 1 + time.sleep(0.5) + if retries > max_retries: + raise Exception(f"Failed to delete topics {topic}, {result}") + + +def get_kafka_producer(port, serializer, retries): + errors = [] + for _ in range(retries): + try: + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(port), + value_serializer=serializer, + ) + logging.debug("Kafka Connection establised: localhost:{}".format(port)) + return producer + except Exception as e: + errors += [str(e)] + time.sleep(1) + + raise Exception("Connection not establised, {}".format(errors)) + + +def producer_serializer(x): + return x.encode() if isinstance(x, str) else x + + +def get_admin_client(kafka_cluster): + return KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + +def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): + logging.debug( + "kafka_produce server:{}:{} topic:{}".format( + "localhost", kafka_cluster.kafka_port, topic + ) + ) + producer = get_kafka_producer( + kafka_cluster.kafka_port, producer_serializer, retries + ) + for message in messages: + producer.send(topic=topic, value=message, timestamp_ms=timestamp) + producer.flush() + + +def kafka_consume(kafka_cluster, topic, need_decode=True, timestamp=0): + consumer = KafkaConsumer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + auto_offset_reset="earliest", + ) + consumer.subscribe(topics=(topic)) + for toppar, messages in list(consumer.poll(5000).items()): + if toppar.topic == topic: + for message in messages: + assert timestamp == 0 or message.timestamp / 1000 == timestamp + if need_decode: + yield message.value.decode() + else: + yield message.value + consumer.unsubscribe() + consumer.close() + + +def kafka_produce_protobuf_messages(kafka_cluster, topic, start_index, num_messages): + data = b"" + for i in range(start_index, start_index + num_messages): + msg = kafka_pb2.KeyValuePair() + msg.key = i + msg.value = str(i) + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + ) + producer.send(topic=topic, value=data) + producer.flush() + logging.debug(("Produced {} messages for topic {}".format(num_messages, topic))) + + +def kafka_consume_with_retry( + kafka_cluster, + topic, + expected_messages, + need_decode=True, + timestamp=0, + retry_count=20, + sleep_time=0.1, +): + messages = [] + try_count = 0 + while try_count < retry_count: + try_count += 1 + messages.extend( + kafka_consume( + kafka_cluster, topic, need_decode=need_decode, timestamp=timestamp + ) + ) + if len(messages) == expected_messages: + break + time.sleep(sleep_time) + if len(messages) != expected_messages: + raise Exception(f"Got only {len(messages)} messages") + return messages + + +def kafka_produce_protobuf_messages_no_delimiters( + kafka_cluster, topic, start_index, num_messages +): + data = "" + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + for i in range(start_index, start_index + num_messages): + msg = kafka_pb2.KeyValuePair() + msg.key = i + msg.value = str(i) + serialized_msg = msg.SerializeToString() + producer.send(topic=topic, value=serialized_msg) + producer.flush() + logging.debug("Produced {} messages for topic {}".format(num_messages, topic)) + + +def kafka_produce_protobuf_social(kafka_cluster, topic, start_index, num_messages): + data = b"" + for i in range(start_index, start_index + num_messages): + msg = social_pb2.User() + msg.username = "John Doe {}".format(i) + msg.timestamp = 1000000 + i + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + ) + producer.send(topic=topic, value=data) + producer.flush() + logging.debug(("Produced {} messages for topic {}".format(num_messages, topic))) + + +def avro_message(value): + schema = avro.schema.make_avsc_object( + { + "name": "row", + "type": "record", + "fields": [ + {"name": "id", "type": "long"}, + {"name": "blockNo", "type": "int"}, + {"name": "val1", "type": "string"}, + {"name": "val2", "type": "float"}, + {"name": "val3", "type": "int"}, + ], + } + ) + bytes_writer = io.BytesIO() + # writer = avro.io.DatumWriter(schema) + # encoder = avro.io.BinaryEncoder(bytes_writer) + # writer.write(value, encoder) + + # DataFileWrite seems to be mandatory to get schema encoded + writer = avro.datafile.DataFileWriter(bytes_writer, avro.io.DatumWriter(), schema) + if isinstance(value, list): + for v in value: + writer.append(v) + else: + writer.append(value) + writer.flush() + raw_bytes = bytes_writer.getvalue() + + writer.close() + bytes_writer.close() + return raw_bytes + + +def avro_confluent_message(schema_registry_client, value): + serializer = MessageSerializer(schema_registry_client) + schema = avro.schema.make_avsc_object( + { + "name": "row", + "type": "record", + "fields": [ + {"name": "id", "type": "long"}, + {"name": "blockNo", "type": "int"}, + {"name": "val1", "type": "string"}, + {"name": "val2", "type": "float"}, + {"name": "val3", "type": "int"}, + ], + } + ) + return serializer.encode_record_with_schema("test_subject", schema, value) + + +def create_settings_string(settings): + if settings is None: + return "" + + def format_value(value): + if isinstance(value, str): + return f"'{value}'" + elif isinstance(value, bool): + return str(int(value)) + return str(value) + + settings_string = "SETTINGS " + keys = settings.keys() + first_key = next(iter(settings)) + settings_string += str(first_key) + " = " + format_value(settings[first_key]) + for key in keys: + if key == first_key: + continue + settings_string += ", " + str(key) + " = " + format_value(settings[key]) + return settings_string + + +def generate_old_create_table_query( + table_name, + columns_def, + database="test", + brokers="{kafka_broker}:19092", + topic_list="{kafka_topic_new}", + consumer_group="{kafka_group_name_new}", + format="{kafka_format_json_each_row}", + row_delimiter="\\n", + keeper_path=None, # it is not used, but it is easier to handle keeper_path and replica_name like this + replica_name=None, + settings=None, +): + settings_string = create_settings_string(settings) + query = f"""CREATE TABLE {database}.{table_name} ({columns_def}) ENGINE = Kafka('{brokers}', '{topic_list}', '{consumer_group}', '{format}', '{row_delimiter}') +{settings_string}""" + logging.debug(f"Generated old create query: {query}") + return query + + +def generate_new_create_table_query( + table_name, + columns_def, + database="test", + brokers="{kafka_broker}:19092", + topic_list="{kafka_topic_new}", + consumer_group="{kafka_group_name_new}", + format="{kafka_format_json_each_row}", + row_delimiter="\\n", + keeper_path=None, + replica_name=None, + settings=None, +): + if settings is None: + settings = {} + if keeper_path is None: + keeper_path = f"/clickhouse/{{database}}/{table_name}" + if replica_name is None: + replica_name = "r1" + settings["kafka_keeper_path"] = keeper_path + settings["kafka_replica_name"] = replica_name + settings_string = create_settings_string(settings) + query = f"""CREATE TABLE {database}.{table_name} ({columns_def}) ENGINE = Kafka('{brokers}', '{topic_list}', '{consumer_group}', '{format}', '{row_delimiter}') +{settings_string} +SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1""" + logging.debug(f"Generated new create query: {query}") + return query + + +def must_use_thread_per_consumer(generator): + if generator == generate_old_create_table_query: + return False + if generator == generate_new_create_table_query: + return True + raise Exception("Unexpected generator") + + +def get_topic_postfix(generator): + if generator == generate_old_create_table_query: + return "old" + if generator == generate_new_create_table_query: + return "new" + raise Exception("Unexpected generator") diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index bef90e1b9d3..1d2053f13cb 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -7,40 +7,57 @@ import time import logging import io import string -import ast import math - +import pytest import avro.schema import avro.io import avro.datafile from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) -from confluent_kafka.avro.serializer.message_serializer import MessageSerializer - import kafka.errors -import pytest -from google.protobuf.internal.encoder import _VarintBytes -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster, is_arm -from helpers.network import PartitionManager -from helpers.test_tools import TSV -from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer, BrokerConnection +from kafka import KafkaAdminClient, KafkaProducer, BrokerConnection from kafka.protocol.admin import DescribeGroupsRequest_v1 from kafka.protocol.group import MemberAssignment -from kafka.admin import NewTopic -from contextlib import contextmanager +from google.protobuf.internal.encoder import _VarintBytes -# protoc --version -# libprotoc 3.0.0 -# # to create kafka_pb2.py -# protoc --python_out=. kafka.proto +from helpers.client import QueryRuntimeException +from helpers.cluster import is_arm +from helpers.network import PartitionManager +from helpers.test_tools import TSV +from test_storage_kafka.kafka_tests_utils import ( + kafka_create_topic, + kafka_delete_topic, + get_kafka_producer, + generate_old_create_table_query, + generate_new_create_table_query, + kafka_produce, + producer_serializer, + avro_message, + avro_confluent_message, + get_admin_client, + kafka_produce_protobuf_messages, + kafka_produce_protobuf_social, + kafka_produce_protobuf_messages_no_delimiters, + kafka_topic, + existing_kafka_topic, + must_use_thread_per_consumer, + get_topic_postfix, + kafka_consume_with_retry, +) +from test_storage_kafka.conftest import ( + conftest_cluster, + init_cluster_and_instance, + KAFKA_TOPIC_OLD, + KAFKA_CONSUMER_GROUP_OLD, + KAFKA_TOPIC_NEW, + KAFKA_CONSUMER_GROUP_NEW, +) -from . import kafka_pb2 -from . import social_pb2 -from . import message_with_repeated_pb2 +from test_storage_kafka import message_with_repeated_pb2 +# Skip all tests on ARM if is_arm(): pytestmark = pytest.mark.skip @@ -48,407 +65,14 @@ if is_arm(): # TODO: add test for SELECT LIMIT is working. -KAFKA_TOPIC_OLD = "old_t" -KAFKA_CONSUMER_GROUP_OLD = "old_cg" -KAFKA_TOPIC_NEW = "new_t" -KAFKA_CONSUMER_GROUP_NEW = "new_cg" - - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=["configs/kafka.xml", "configs/named_collection.xml"], - user_configs=["configs/users.xml"], - with_kafka=True, - with_zookeeper=True, # For Replicated Table - macros={ - "kafka_broker": "kafka1", - "kafka_topic_old": KAFKA_TOPIC_OLD, - "kafka_group_name_old": KAFKA_CONSUMER_GROUP_OLD, - "kafka_topic_new": KAFKA_TOPIC_NEW, - "kafka_group_name_new": KAFKA_CONSUMER_GROUP_NEW, - "kafka_client_id": "instance", - "kafka_format_json_each_row": "JSONEachRow", - }, - clickhouse_path_dir="clickhouse_path", -) - - -def get_kafka_producer(port, serializer, retries): - errors = [] - for _ in range(retries): - try: - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(port), - value_serializer=serializer, - ) - logging.debug("Kafka Connection establised: localhost:{}".format(port)) - return producer - except Exception as e: - errors += [str(e)] - time.sleep(1) - - raise Exception("Connection not establised, {}".format(errors)) - - -def producer_serializer(x): - return x.encode() if isinstance(x, str) else x - - -def kafka_create_topic( - admin_client, - topic_name, - num_partitions=1, - replication_factor=1, - max_retries=50, - config=None, -): - logging.debug( - f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" - ) - topics_list = [ - NewTopic( - name=topic_name, - num_partitions=num_partitions, - replication_factor=replication_factor, - topic_configs=config, - ) - ] - retries = 0 - while True: - try: - admin_client.create_topics(new_topics=topics_list, validate_only=False) - logging.debug("Admin client succeed") - return - except Exception as e: - retries += 1 - time.sleep(0.5) - if retries < max_retries: - logging.warning(f"Failed to create topic {e}") - else: - raise - - -def kafka_delete_topic(admin_client, topic, max_retries=50): - result = admin_client.delete_topics([topic]) - for topic, e in result.topic_error_codes: - if e == 0: - logging.debug(f"Topic {topic} deleted") - else: - logging.error(f"Failed to delete topic {topic}: {e}") - - retries = 0 - while True: - topics_listed = admin_client.list_topics() - logging.debug(f"TOPICS LISTED: {topics_listed}") - if topic not in topics_listed: - return - else: - retries += 1 - time.sleep(0.5) - if retries > max_retries: - raise Exception(f"Failed to delete topics {topic}, {result}") - - -@contextmanager -def kafka_topic( - admin_client, - topic_name, - num_partitions=1, - replication_factor=1, - max_retries=50, - config=None, -): - kafka_create_topic( - admin_client, - topic_name, - num_partitions, - replication_factor, - max_retries, - config, - ) - try: - yield None - finally: - # Code to release resource, e.g.: - kafka_delete_topic(admin_client, topic_name, max_retries) - - -@contextmanager -def existing_kafka_topic(admin_client, topic_name, max_retries=50): - try: - yield None - finally: - kafka_delete_topic(admin_client, topic_name, max_retries) - - -def get_admin_client(kafka_cluster): - return KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - -def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): - logging.debug( - "kafka_produce server:{}:{} topic:{}".format( - "localhost", kafka_cluster.kafka_port, topic - ) - ) - producer = get_kafka_producer( - kafka_cluster.kafka_port, producer_serializer, retries - ) - for message in messages: - producer.send(topic=topic, value=message, timestamp_ms=timestamp) - producer.flush() - - -## just to ensure the python client / producer is working properly -def kafka_producer_send_heartbeat_msg(max_retries=50): - kafka_produce(kafka_cluster, "test_heartbeat_topic", ["test"], retries=max_retries) - - -def kafka_consume(kafka_cluster, topic, need_decode=True, timestamp=0): - consumer = KafkaConsumer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - auto_offset_reset="earliest", - ) - consumer.subscribe(topics=(topic)) - for toppar, messages in list(consumer.poll(5000).items()): - if toppar.topic == topic: - for message in messages: - assert timestamp == 0 or message.timestamp / 1000 == timestamp - if need_decode: - yield message.value.decode() - else: - yield message.value - consumer.unsubscribe() - consumer.close() - - -def kafka_produce_protobuf_messages(kafka_cluster, topic, start_index, num_messages): - data = b"" - for i in range(start_index, start_index + num_messages): - msg = kafka_pb2.KeyValuePair() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - ) - producer.send(topic=topic, value=data) - producer.flush() - logging.debug(("Produced {} messages for topic {}".format(num_messages, topic))) - - -def kafka_consume_with_retry( - kafka_cluster, - topic, - expected_messages, - need_decode=True, - timestamp=0, - retry_count=20, - sleep_time=0.1, -): - messages = [] - try_count = 0 - while try_count < retry_count: - try_count += 1 - messages.extend( - kafka_consume( - kafka_cluster, topic, need_decode=need_decode, timestamp=timestamp - ) - ) - if len(messages) == expected_messages: - break - time.sleep(sleep_time) - if len(messages) != expected_messages: - raise Exception(f"Got only {len(messages)} messages") - return messages - - -def kafka_produce_protobuf_messages_no_delimiters( - kafka_cluster, topic, start_index, num_messages -): - data = "" - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - for i in range(start_index, start_index + num_messages): - msg = kafka_pb2.KeyValuePair() - msg.key = i - msg.value = str(i) - serialized_msg = msg.SerializeToString() - producer.send(topic=topic, value=serialized_msg) - producer.flush() - logging.debug("Produced {} messages for topic {}".format(num_messages, topic)) - - -def kafka_produce_protobuf_social(kafka_cluster, topic, start_index, num_messages): - data = b"" - for i in range(start_index, start_index + num_messages): - msg = social_pb2.User() - msg.username = "John Doe {}".format(i) - msg.timestamp = 1000000 + i - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - ) - producer.send(topic=topic, value=data) - producer.flush() - logging.debug(("Produced {} messages for topic {}".format(num_messages, topic))) - - -def avro_message(value): - schema = avro.schema.make_avsc_object( - { - "name": "row", - "type": "record", - "fields": [ - {"name": "id", "type": "long"}, - {"name": "blockNo", "type": "int"}, - {"name": "val1", "type": "string"}, - {"name": "val2", "type": "float"}, - {"name": "val3", "type": "int"}, - ], - } - ) - bytes_writer = io.BytesIO() - # writer = avro.io.DatumWriter(schema) - # encoder = avro.io.BinaryEncoder(bytes_writer) - # writer.write(value, encoder) - - # DataFileWrite seems to be mandatory to get schema encoded - writer = avro.datafile.DataFileWriter(bytes_writer, avro.io.DatumWriter(), schema) - if isinstance(value, list): - for v in value: - writer.append(v) - else: - writer.append(value) - writer.flush() - raw_bytes = bytes_writer.getvalue() - - writer.close() - bytes_writer.close() - return raw_bytes - - -def avro_confluent_message(schema_registry_client, value): - # type: (CachedSchemaRegistryClient, dict) -> str - - serializer = MessageSerializer(schema_registry_client) - schema = avro.schema.make_avsc_object( - { - "name": "row", - "type": "record", - "fields": [ - {"name": "id", "type": "long"}, - {"name": "blockNo", "type": "int"}, - {"name": "val1", "type": "string"}, - {"name": "val2", "type": "float"}, - {"name": "val3", "type": "int"}, - ], - } - ) - return serializer.encode_record_with_schema("test_subject", schema, value) - - -def create_settings_string(settings): - if settings is None: - return "" - - def format_value(value): - if isinstance(value, str): - return f"'{value}'" - elif isinstance(value, bool): - return str(int(value)) - return str(value) - - settings_string = "SETTINGS " - keys = settings.keys() - first_key = next(iter(settings)) - settings_string += str(first_key) + " = " + format_value(settings[first_key]) - for key in keys: - if key == first_key: - continue - settings_string += ", " + str(key) + " = " + format_value(settings[key]) - return settings_string - - -def generate_old_create_table_query( - table_name, - columns_def, - database="test", - brokers="{kafka_broker}:19092", - topic_list="{kafka_topic_new}", - consumer_group="{kafka_group_name_new}", - format="{kafka_format_json_each_row}", - row_delimiter="\\n", - keeper_path=None, # it is not used, but it is easier to handle keeper_path and replica_name like this - replica_name=None, - settings=None, -): - settings_string = create_settings_string(settings) - query = f"""CREATE TABLE {database}.{table_name} ({columns_def}) ENGINE = Kafka('{brokers}', '{topic_list}', '{consumer_group}', '{format}', '{row_delimiter}') -{settings_string}""" - logging.debug(f"Generated old create query: {query}") - return query - - -def generate_new_create_table_query( - table_name, - columns_def, - database="test", - brokers="{kafka_broker}:19092", - topic_list="{kafka_topic_new}", - consumer_group="{kafka_group_name_new}", - format="{kafka_format_json_each_row}", - row_delimiter="\\n", - keeper_path=None, - replica_name=None, - settings=None, -): - if settings is None: - settings = {} - if keeper_path is None: - keeper_path = f"/clickhouse/{{database}}/{table_name}" - if replica_name is None: - replica_name = "r1" - settings["kafka_keeper_path"] = keeper_path - settings["kafka_replica_name"] = replica_name - settings_string = create_settings_string(settings) - query = f"""CREATE TABLE {database}.{table_name} ({columns_def}) ENGINE = Kafka('{brokers}', '{topic_list}', '{consumer_group}', '{format}', '{row_delimiter}') -{settings_string} -SETTINGS allow_experimental_kafka_offsets_storage_in_keeper=1""" - logging.debug(f"Generated new create query: {query}") - return query - - -def must_use_thread_per_consumer(generator): - if generator == generate_old_create_table_query: - return False - if generator == generate_new_create_table_query: - return True - raise Exception("Unexpected generator") - - -def get_topic_postfix(generator): - if generator == generate_old_create_table_query: - return "old" - if generator == generate_new_create_table_query: - return "new" - raise Exception("Unexpected generator") - - # Tests @pytest.mark.parametrize( "create_query_generator, do_direct_read", [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], ) -def test_kafka_column_types(kafka_cluster, create_query_generator, do_direct_read): +def test_kafka_column_types( + kafka_cluster, instance, create_query_generator, do_direct_read +): def assert_returned_exception(e): assert e.value.returncode == 36 assert ( @@ -512,7 +136,7 @@ def test_kafka_column_types(kafka_cluster, create_query_generator, do_direct_rea instance.query("DROP TABLE test.kafka SYNC") -def test_kafka_settings_old_syntax(kafka_cluster): +def test_kafka_settings_old_syntax(kafka_cluster, instance): assert TSV( instance.query( "SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro", @@ -557,7 +181,7 @@ kafka_topic_old old_t # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose")) -def test_kafka_settings_new_syntax(kafka_cluster): +def test_kafka_settings_new_syntax(kafka_cluster, instance): instance.query( """ CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -599,7 +223,7 @@ def test_kafka_settings_new_syntax(kafka_cluster): assert members[0]["client_id"] == "instance test 1234" -def test_kafka_settings_predefined_macros(kafka_cluster): +def test_kafka_settings_predefined_macros(kafka_cluster, instance): instance.query( """ CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -627,7 +251,7 @@ def test_kafka_settings_predefined_macros(kafka_cluster): assert members[0]["client_id"] == "test_kafka test 1234" -def test_kafka_json_as_string(kafka_cluster): +def test_kafka_json_as_string(kafka_cluster, instance): kafka_produce( kafka_cluster, "kafka_json_as_string", @@ -641,7 +265,7 @@ def test_kafka_json_as_string(kafka_cluster): # 'tombstone' record (null value) = marker of deleted record producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, key_serializer=producer_serializer, ) @@ -677,7 +301,7 @@ def test_kafka_json_as_string(kafka_cluster): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_formats(kafka_cluster, create_query_generator): +def test_kafka_formats(kafka_cluster, instance, create_query_generator): schema_registry_client = CachedSchemaRegistryClient( {"url": f"http://localhost:{kafka_cluster.schema_registry_port}"} ) @@ -1145,50 +769,8 @@ def describe_consumer_group(kafka_cluster, name): return res -# Fixtures - - -@pytest.fixture(scope="module") -def kafka_cluster(): - try: - cluster.start() - kafka_id = instance.cluster.kafka_docker_id - print(("kafka_id is {}".format(kafka_id))) - yield cluster - finally: - cluster.shutdown() - - -@pytest.fixture(autouse=True) -def kafka_setup_teardown(): - instance.query("DROP DATABASE IF EXISTS test SYNC; CREATE DATABASE test;") - admin_client = get_admin_client(cluster) - - def get_topics_to_delete(): - return [t for t in admin_client.list_topics() if not t.startswith("_")] - - topics = get_topics_to_delete() - logging.debug(f"Deleting topics: {topics}") - result = admin_client.delete_topics(topics) - for topic, error in result.topic_error_codes: - if error != 0: - logging.warning(f"Received error {error} while deleting topic {topic}") - else: - logging.info(f"Deleted topic {topic}") - - retries = 0 - topics = get_topics_to_delete() - while len(topics) != 0: - logging.info(f"Existing topics: {topics}") - if retries >= 5: - raise Exception(f"Failed to delete topics {topics}") - retries += 1 - time.sleep(0.5) - yield # run test - - # Tests -def test_kafka_issue11308(kafka_cluster): +def test_kafka_issue11308(kafka_cluster, instance): # Check that matview does respect Kafka SETTINGS kafka_produce( kafka_cluster, @@ -1247,7 +829,7 @@ def test_kafka_issue11308(kafka_cluster): assert TSV(result) == TSV(expected) -def test_kafka_issue4116(kafka_cluster): +def test_kafka_issue4116(kafka_cluster, instance): # Check that format_csv_delimiter parameter works now - as part of all available format settings. kafka_produce( kafka_cluster, @@ -1282,7 +864,7 @@ def test_kafka_issue4116(kafka_cluster): assert TSV(result) == TSV(expected) -def test_kafka_consumer_hang(kafka_cluster): +def test_kafka_consumer_hang(kafka_cluster, instance): admin_client = get_admin_client(kafka_cluster) topic_name = "consumer_hang" @@ -1362,7 +944,7 @@ def test_kafka_consumer_hang(kafka_cluster): kafka_delete_topic(admin_client, topic_name) -def test_kafka_consumer_hang2(kafka_cluster): +def test_kafka_consumer_hang2(kafka_cluster, instance): admin_client = get_admin_client(kafka_cluster) topic_name = "consumer_hang2" @@ -1423,7 +1005,7 @@ def test_kafka_consumer_hang2(kafka_cluster): # sequential read from different consumers leads to breaking lot of kafka invariants # (first consumer will get all partitions initially, and may have problems in doing polls every 60 sec) -def test_kafka_read_consumers_in_parallel(kafka_cluster): +def test_kafka_read_consumers_in_parallel(kafka_cluster, instance): admin_client = get_admin_client(kafka_cluster) topic_name = "read_consumers_in_parallel" @@ -1489,7 +1071,7 @@ def test_kafka_read_consumers_in_parallel(kafka_cluster): kafka_delete_topic(admin_client, topic_name) -def test_kafka_csv_with_delimiter(kafka_cluster): +def test_kafka_csv_with_delimiter(kafka_cluster, instance): messages = [] for i in range(50): messages.append("{i}, {i}".format(i=i)) @@ -1516,7 +1098,7 @@ def test_kafka_csv_with_delimiter(kafka_cluster): kafka_check_result(result, True) -def test_kafka_tsv_with_delimiter(kafka_cluster): +def test_kafka_tsv_with_delimiter(kafka_cluster, instance): messages = [] for i in range(50): messages.append("{i}\t{i}".format(i=i)) @@ -1543,7 +1125,7 @@ def test_kafka_tsv_with_delimiter(kafka_cluster): kafka_check_result(result, True) -def test_kafka_select_empty(kafka_cluster): +def test_kafka_select_empty(kafka_cluster, instance): admin_client = get_admin_client(kafka_cluster) topic_name = "empty" kafka_create_topic(admin_client, topic_name) @@ -1565,7 +1147,7 @@ def test_kafka_select_empty(kafka_cluster): kafka_delete_topic(admin_client, topic_name) -def test_kafka_json_without_delimiter(kafka_cluster): +def test_kafka_json_without_delimiter(kafka_cluster, instance): messages = "" for i in range(25): messages += json.dumps({"key": i, "value": i}) + "\n" @@ -1597,7 +1179,7 @@ def test_kafka_json_without_delimiter(kafka_cluster): kafka_check_result(result, True) -def test_kafka_protobuf(kafka_cluster): +def test_kafka_protobuf(kafka_cluster, instance): kafka_produce_protobuf_messages(kafka_cluster, "pb", 0, 20) kafka_produce_protobuf_messages(kafka_cluster, "pb", 20, 1) kafka_produce_protobuf_messages(kafka_cluster, "pb", 21, 29) @@ -1624,7 +1206,7 @@ def test_kafka_protobuf(kafka_cluster): kafka_check_result(result, True) -def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster): +def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster, instance): # https://github.com/ClickHouse/ClickHouse/issues/12615 kafka_produce_protobuf_social( kafka_cluster, "string_field_on_first_position_in_protobuf", 0, 20 @@ -1708,7 +1290,7 @@ John Doe 49 1000049 assert TSV(result) == TSV(expected) -def test_kafka_protobuf_no_delimiter(kafka_cluster): +def test_kafka_protobuf_no_delimiter(kafka_cluster, instance): instance.query( """ CREATE TABLE test.kafka (key UInt64, value String) @@ -1773,7 +1355,7 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_materialized_view(kafka_cluster, create_query_generator): +def test_kafka_materialized_view(kafka_cluster, instance, create_query_generator): topic_name = "mv" instance.query( @@ -1825,7 +1407,9 @@ def test_kafka_materialized_view(kafka_cluster, create_query_generator): ), ], ) -def test_kafka_recreate_kafka_table(kafka_cluster, create_query_generator, log_line): +def test_kafka_recreate_kafka_table( + kafka_cluster, instance, create_query_generator, log_line +): """ Checks that materialized view work properly after dropping and recreating the Kafka table. """ @@ -1911,7 +1495,9 @@ def test_kafka_recreate_kafka_table(kafka_cluster, create_query_generator, log_l ), ], ) -def test_librdkafka_compression(kafka_cluster, create_query_generator, log_line): +def test_librdkafka_compression( + kafka_cluster, instance, create_query_generator, log_line +): """ Regression for UB in snappy-c (that is used in librdkafka), backport pr is [1]. @@ -1992,7 +1578,9 @@ def test_librdkafka_compression(kafka_cluster, create_query_generator, log_line) "create_query_generator", [generate_new_create_table_query, generate_old_create_table_query], ) -def test_kafka_materialized_view_with_subquery(kafka_cluster, create_query_generator): +def test_kafka_materialized_view_with_subquery( + kafka_cluster, instance, create_query_generator +): topic_name = "mysq" logging.debug(f"Using topic {topic_name}") @@ -2044,7 +1632,7 @@ def test_kafka_materialized_view_with_subquery(kafka_cluster, create_query_gener "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_many_materialized_views(kafka_cluster, create_query_generator): +def test_kafka_many_materialized_views(kafka_cluster, instance, create_query_generator): topic_name = f"mmv-{get_topic_postfix(create_query_generator)}" create_query = create_query_generator( "kafka", @@ -2103,7 +1691,7 @@ def test_kafka_many_materialized_views(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_flush_on_big_message(kafka_cluster, create_query_generator): +def test_kafka_flush_on_big_message(kafka_cluster, instance, create_query_generator): # Create batches of messages of size ~100Kb kafka_messages = 1000 batch_messages = 1000 @@ -2166,7 +1754,7 @@ def test_kafka_flush_on_big_message(kafka_cluster, create_query_generator): ), "ClickHouse lost some messages: {}".format(result) -def test_kafka_virtual_columns(kafka_cluster): +def test_kafka_virtual_columns(kafka_cluster, instance): topic_config = { # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", @@ -2211,7 +1799,7 @@ def test_kafka_virtual_columns(kafka_cluster): [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_virtual_columns_with_materialized_view( - kafka_cluster, create_query_generator + kafka_cluster, instance, create_query_generator ): topic_config = { # default retention, since predefined timestamp_ms is used. @@ -2282,7 +1870,7 @@ def insert_with_retry(instance, values, table_name="kafka", max_try_count=5): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_insert(kafka_cluster, create_query_generator): +def test_kafka_insert(kafka_cluster, instance, create_query_generator): topic_name = "insert1" + get_topic_postfix(create_query_generator) instance.query( @@ -2313,7 +1901,7 @@ def test_kafka_insert(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_produce_consume(kafka_cluster, create_query_generator): +def test_kafka_produce_consume(kafka_cluster, instance, create_query_generator): topic_name = "insert2" + get_topic_postfix(create_query_generator) create_query = create_query_generator( @@ -2382,7 +1970,7 @@ def test_kafka_produce_consume(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_commit_on_block_write(kafka_cluster, create_query_generator): +def test_kafka_commit_on_block_write(kafka_cluster, instance, create_query_generator): topic_name = "block" + get_topic_postfix(create_query_generator) create_query = create_query_generator( "kafka", @@ -2464,7 +2052,9 @@ def test_kafka_commit_on_block_write(kafka_cluster, create_query_generator): ), ], ) -def test_kafka_virtual_columns2(kafka_cluster, create_query_generator, log_line): +def test_kafka_virtual_columns2( + kafka_cluster, instance, create_query_generator, log_line +): admin_client = get_admin_client(kafka_cluster) topic_config = { @@ -2500,7 +2090,7 @@ def test_kafka_virtual_columns2(kafka_cluster, create_query_generator, log_line) ) producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, key_serializer=producer_serializer, ) @@ -2617,7 +2207,7 @@ def test_kafka_virtual_columns2(kafka_cluster, create_query_generator, log_line) [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], ) def test_kafka_producer_consumer_separate_settings( - kafka_cluster, create_query_generator, do_direct_read + kafka_cluster, instance, create_query_generator, do_direct_read ): instance.rotate_logs() instance.query( @@ -2700,7 +2290,9 @@ def test_kafka_producer_consumer_separate_settings( (generate_old_create_table_query, "Committed offset 5"), ], ) -def test_kafka_produce_key_timestamp(kafka_cluster, create_query_generator, log_line): +def test_kafka_produce_key_timestamp( + kafka_cluster, instance, create_query_generator, log_line +): topic_name = "insert3" topic_config = { # default retention, since predefined timestamp_ms is used. @@ -2780,7 +2372,7 @@ def test_kafka_produce_key_timestamp(kafka_cluster, create_query_generator, log_ "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_insert_avro(kafka_cluster, create_query_generator): +def test_kafka_insert_avro(kafka_cluster, instance, create_query_generator): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2835,7 +2427,7 @@ def test_kafka_insert_avro(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_produce_consume_avro(kafka_cluster, create_query_generator): +def test_kafka_produce_consume_avro(kafka_cluster, instance, create_query_generator): topic_name = "insert_avro" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): num_rows = 75 @@ -2895,7 +2487,7 @@ def test_kafka_produce_consume_avro(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_flush_by_time(kafka_cluster, create_query_generator): +def test_kafka_flush_by_time(kafka_cluster, instance, create_query_generator): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2964,7 +2556,7 @@ def test_kafka_flush_by_time(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_flush_by_block_size(kafka_cluster, create_query_generator): +def test_kafka_flush_by_block_size(kafka_cluster, instance, create_query_generator): topic_name = "flush_by_block_size" + get_topic_postfix(create_query_generator) cancel = threading.Event() @@ -3042,7 +2634,7 @@ def test_kafka_flush_by_block_size(kafka_cluster, create_query_generator): [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_lot_of_partitions_partial_commit_of_bulk( - kafka_cluster, create_query_generator + kafka_cluster, instance, create_query_generator ): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) @@ -3108,7 +2700,7 @@ def test_kafka_lot_of_partitions_partial_commit_of_bulk( (generate_new_create_table_query, "{}.*Saved offset"), ], ) -def test_kafka_rebalance(kafka_cluster, create_query_generator, log_line): +def test_kafka_rebalance(kafka_cluster, instance, create_query_generator, log_line): NUMBER_OF_CONSURRENT_CONSUMERS = 11 instance.query( @@ -3283,7 +2875,9 @@ def test_kafka_rebalance(kafka_cluster, create_query_generator, log_line): # generate_new_create_table_query, ], ) -def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster, create_query_generator): +def test_kafka_no_holes_when_write_suffix_failed( + kafka_cluster, instance, create_query_generator +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -3357,7 +2951,7 @@ def test_kafka_no_holes_when_write_suffix_failed(kafka_cluster, create_query_gen assert TSV(result) == TSV("22\t22\t22") -def test_exception_from_destructor(kafka_cluster): +def test_exception_from_destructor(kafka_cluster, instance): instance.query( """ CREATE TABLE test.kafka (key UInt64, value String) @@ -3404,7 +2998,9 @@ def test_exception_from_destructor(kafka_cluster): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_commits_of_unprocessed_messages_on_drop(kafka_cluster, create_query_generator): +def test_commits_of_unprocessed_messages_on_drop( + kafka_cluster, instance, create_query_generator +): topic_name = "commits_of_unprocessed_messages_on_drop" + get_topic_postfix( create_query_generator ) @@ -3517,7 +3113,7 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster, create_query_gen "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_bad_reschedule(kafka_cluster, create_query_generator): +def test_bad_reschedule(kafka_cluster, instance, create_query_generator): topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(20000)] @@ -3563,7 +3159,7 @@ def test_bad_reschedule(kafka_cluster, create_query_generator): ) -def test_kafka_duplicates_when_commit_failed(kafka_cluster): +def test_kafka_duplicates_when_commit_failed(kafka_cluster, instance): messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] kafka_produce(kafka_cluster, "duplicates_when_commit_failed", messages) @@ -3642,7 +3238,7 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_premature_flush_on_eof(kafka_cluster, create_query_generator): +def test_premature_flush_on_eof(kafka_cluster, instance, create_query_generator): topic_name = "premature_flush_on_eof" + get_topic_postfix(create_query_generator) create_query = create_query_generator( "kafka", @@ -3723,7 +3319,9 @@ def test_premature_flush_on_eof(kafka_cluster, create_query_generator): "create_query_generator, do_direct_read", [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], ) -def test_kafka_unavailable(kafka_cluster, create_query_generator, do_direct_read): +def test_kafka_unavailable( + kafka_cluster, instance, create_query_generator, do_direct_read +): number_of_messages = 20000 topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) messages = [ @@ -3780,7 +3378,7 @@ def test_kafka_unavailable(kafka_cluster, create_query_generator, do_direct_read "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_issue14202(kafka_cluster, create_query_generator): +def test_kafka_issue14202(kafka_cluster, instance, create_query_generator): """ INSERT INTO Kafka Engine from an empty SELECT sub query was leading to failure """ @@ -3819,7 +3417,7 @@ def test_kafka_issue14202(kafka_cluster, create_query_generator): ) -def test_kafka_csv_with_thread_per_consumer(kafka_cluster): +def test_kafka_csv_with_thread_per_consumer(kafka_cluster, instance): instance.query( """ CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -3857,7 +3455,9 @@ def random_string(size=8): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_engine_put_errors_to_stream(kafka_cluster, create_query_generator): +def test_kafka_engine_put_errors_to_stream( + kafka_cluster, instance, create_query_generator +): topic_name = "kafka_engine_put_errors_to_stream" + get_topic_postfix( create_query_generator ) @@ -3951,7 +3551,7 @@ def gen_message_with_jsons(jsons=10, malformed=0): [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( - kafka_cluster, create_query_generator + kafka_cluster, instance, create_query_generator ): topic_name = ( "kafka_engine_put_errors_to_stream_with_random_malformed_json" @@ -4020,7 +3620,9 @@ def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator): +def test_kafka_formats_with_broken_message( + kafka_cluster, instance, create_query_generator +): # data was dumped from clickhouse itself in a following manner # clickhouse-client --format=Native --query='SELECT toInt64(number) as id, toUInt16( intDiv( id, 65536 ) ) as blockNo, reinterpretAsString(19777) as val1, toFloat32(0.5) as val2, toUInt8(1) as val3 from numbers(100) ORDER BY id' | xxd -ps | tr -d '\n' | sed 's/\(..\)/\\x\1/g' admin_client = KafkaAdminClient( @@ -4356,7 +3958,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster, create_query_generator generate_new_create_table_query, ], ) -def test_kafka_consumer_failover(kafka_cluster, create_query_generator): +def test_kafka_consumer_failover(kafka_cluster, instance, create_query_generator): topic_name = "kafka_consumer_failover" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=2): @@ -4405,7 +4007,7 @@ def test_kafka_consumer_failover(kafka_cluster, create_query_generator): ) producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, key_serializer=producer_serializer, ) @@ -4548,7 +4150,7 @@ def test_kafka_consumer_failover(kafka_cluster, create_query_generator): ) -def test_kafka_predefined_configuration(kafka_cluster): +def test_kafka_predefined_configuration(kafka_cluster, instance): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -4579,7 +4181,7 @@ def test_kafka_predefined_configuration(kafka_cluster): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_issue26643(kafka_cluster, create_query_generator): +def test_issue26643(kafka_cluster, instance, create_query_generator): producer = KafkaProducer( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, @@ -4699,7 +4301,7 @@ def test_issue26643(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_num_consumers_limit(kafka_cluster, create_query_generator): +def test_num_consumers_limit(kafka_cluster, instance, create_query_generator): instance.query("DROP TABLE IF EXISTS test.kafka") thread_per_consumer = must_use_thread_per_consumer(create_query_generator) @@ -4733,7 +4335,7 @@ def test_num_consumers_limit(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_format_with_prefix_and_suffix(kafka_cluster, create_query_generator): +def test_format_with_prefix_and_suffix(kafka_cluster, instance, create_query_generator): topic_name = "custom" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): @@ -4770,7 +4372,7 @@ def test_format_with_prefix_and_suffix(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_max_rows_per_message(kafka_cluster, create_query_generator): +def test_max_rows_per_message(kafka_cluster, instance, create_query_generator): topic_name = "custom_max_rows_per_message" + get_topic_postfix( create_query_generator ) @@ -4828,7 +4430,7 @@ def test_max_rows_per_message(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_row_based_formats(kafka_cluster, create_query_generator): +def test_row_based_formats(kafka_cluster, instance, create_query_generator): admin_client = get_admin_client(kafka_cluster) for format_name in [ @@ -4904,7 +4506,7 @@ def test_row_based_formats(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_block_based_formats_1(kafka_cluster, create_query_generator): +def test_block_based_formats_1(kafka_cluster, instance, create_query_generator): topic_name = "pretty_space" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): @@ -4950,7 +4552,7 @@ def test_block_based_formats_1(kafka_cluster, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_block_based_formats_2(kafka_cluster, create_query_generator): +def test_block_based_formats_2(kafka_cluster, instance, create_query_generator): admin_client = get_admin_client(kafka_cluster) num_rows = 100 message_count = 9 @@ -5009,7 +4611,7 @@ def test_block_based_formats_2(kafka_cluster, create_query_generator): assert result == expected -def test_system_kafka_consumers(kafka_cluster): +def test_system_kafka_consumers(kafka_cluster, instance): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -5085,14 +4687,14 @@ is_currently_used: 0 kafka_delete_topic(admin_client, topic) -def test_system_kafka_consumers_rebalance(kafka_cluster, max_retries=15): +def test_system_kafka_consumers_rebalance(kafka_cluster, instance): # based on test_kafka_consumer_hang2 admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, key_serializer=producer_serializer, ) @@ -5204,13 +4806,13 @@ is_currently_used: 0 kafka_delete_topic(admin_client, topic) -def test_system_kafka_consumers_rebalance_mv(kafka_cluster, max_retries=15): +def test_system_kafka_consumers_rebalance_mv(kafka_cluster, instance, max_retries=15): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(cluster.kafka_port), + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, key_serializer=producer_serializer, ) @@ -5295,7 +4897,7 @@ JSONExtractString(rdkafka_stat, 'type'): consumer kafka_delete_topic(admin_client, topic) -def test_formats_errors(kafka_cluster): +def test_formats_errors(kafka_cluster, instance): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -5390,7 +4992,9 @@ def test_formats_errors(kafka_cluster): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_multiple_read_in_materialized_views(kafka_cluster, create_query_generator): +def test_multiple_read_in_materialized_views( + kafka_cluster, instance, create_query_generator +): topic_name = "multiple_read_from_mv" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): @@ -5468,7 +5072,7 @@ def test_multiple_read_in_materialized_views(kafka_cluster, create_query_generat "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_null_message(kafka_cluster, create_query_generator): +def test_kafka_null_message(kafka_cluster, instance, create_query_generator): topic_name = "null_message" instance.query( @@ -5525,6 +5129,7 @@ def test_kafka_null_message(kafka_cluster, create_query_generator): if __name__ == "__main__": - cluster.start() + init_cluster_and_instance() + conftest_cluster.start() input("Cluster created, press any key to destroy...") - cluster.shutdown() + conftest_cluster.shutdown() diff --git a/tests/integration/test_storage_kafka/test_produce_http_interface.py b/tests/integration/test_storage_kafka/test_produce_http_interface.py index fc10a07f239..8a666d019d3 100644 --- a/tests/integration/test_storage_kafka/test_produce_http_interface.py +++ b/tests/integration/test_storage_kafka/test_produce_http_interface.py @@ -1,109 +1,21 @@ -import time import logging - import pytest -from helpers.cluster import ClickHouseCluster, is_arm -from helpers.test_tools import TSV -from kafka import KafkaAdminClient -from kafka.admin import NewTopic +from helpers.cluster import is_arm +from helpers.test_tools import TSV +from kafka.admin import KafkaAdminClient +from test_storage_kafka.kafka_tests_utils import ( + kafka_create_topic, + kafka_delete_topic, +) +from test_storage_kafka.conftest import conftest_cluster, init_cluster_and_instance + +# Skip all tests on ARM if is_arm(): pytestmark = pytest.mark.skip -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=["configs/kafka.xml", "configs/named_collection.xml"], - user_configs=["configs/users.xml"], - with_kafka=True, - with_zookeeper=True, # For Replicated Table - macros={ - "kafka_broker": "kafka1", - "kafka_topic_old": "old", - "kafka_group_name_old": "old", - "kafka_topic_new": "new", - "kafka_group_name_new": "new", - "kafka_client_id": "instance", - "kafka_format_json_each_row": "JSONEachRow", - }, - clickhouse_path_dir="clickhouse_path", -) - -@pytest.fixture(scope="module") -def kafka_cluster(): - try: - cluster.start() - kafka_id = instance.cluster.kafka_docker_id - print(("kafka_id is {}".format(kafka_id))) - yield cluster - finally: - cluster.shutdown() - - -@pytest.fixture(autouse=True) -def kafka_setup_teardown(): - instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") - # logging.debug("kafka is available - running test") - yield # run test - - -def kafka_create_topic( - admin_client, - topic_name, - num_partitions=1, - replication_factor=1, - max_retries=50, - config=None, -): - logging.debug( - f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" - ) - topics_list = [ - NewTopic( - name=topic_name, - num_partitions=num_partitions, - replication_factor=replication_factor, - topic_configs=config, - ) - ] - retries = 0 - while True: - try: - admin_client.create_topics(new_topics=topics_list, validate_only=False) - logging.debug("Admin client succeed") - return - except Exception as e: - retries += 1 - time.sleep(0.5) - if retries < max_retries: - logging.warning(f"Failed to create topic {e}") - else: - raise - - -def kafka_delete_topic(admin_client, topic, max_retries=50): - result = admin_client.delete_topics([topic]) - for topic, e in result.topic_error_codes: - if e == 0: - logging.debug(f"Topic {topic} deleted") - else: - logging.error(f"Failed to delete topic {topic}: {e}") - - retries = 0 - while True: - topics_listed = admin_client.list_topics() - logging.debug(f"TOPICS LISTED: {topics_listed}") - if topic not in topics_listed: - return - else: - retries += 1 - time.sleep(0.5) - if retries > max_retries: - raise Exception(f"Failed to delete topics {topic}, {result}") - - -def test_kafka_produce_http_interface_row_based_format(kafka_cluster): +def test_kafka_produce_http_interface_row_based_format(kafka_cluster, instance): # reproduction of #61060 with validating the written messages admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) @@ -238,6 +150,7 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster): if __name__ == "__main__": - cluster.start() + init_cluster_and_instance() + conftest_cluster.start() input("Cluster created, press any key to destroy...") - cluster.shutdown() + conftest_cluster.shutdown() From 01766a5e309209f8537ef3f37329596918896c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 10 Sep 2024 15:37:11 +0000 Subject: [PATCH 02/11] Minor speedups in very longs tests --- .../test_storage_kafka/conftest.py | 7 +- tests/integration/test_storage_kafka/test.py | 478 ++++++++++++------ 2 files changed, 339 insertions(+), 146 deletions(-) diff --git a/tests/integration/test_storage_kafka/conftest.py b/tests/integration/test_storage_kafka/conftest.py index d0fe9ca38f9..d205d98683e 100644 --- a/tests/integration/test_storage_kafka/conftest.py +++ b/tests/integration/test_storage_kafka/conftest.py @@ -1,8 +1,9 @@ +from typing import Iterator import pytest import logging import time -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, ClickHouseInstance from test_storage_kafka.kafka_tests_utils import get_admin_client pytestmark = pytest.mark.skip @@ -47,7 +48,7 @@ def init_cluster_and_instance(): @pytest.fixture(scope="module") -def kafka_cluster(): +def kafka_cluster() -> "Iterator[ClickHouseCluster]": try: init_cluster_and_instance() conftest_cluster.start() @@ -60,7 +61,7 @@ def kafka_cluster(): # kafka_cluster is requested here to ensure the cluster is initialized before we yield the instance @pytest.fixture() -def instance(kafka_cluster): +def instance(kafka_cluster) -> "Iterator[ClickHouseInstance]": yield conftest_instance diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 1d2053f13cb..9affd4adf6f 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -23,7 +23,7 @@ from google.protobuf.internal.encoder import _VarintBytes from helpers.client import QueryRuntimeException -from helpers.cluster import is_arm +from helpers.cluster import is_arm, ClickHouseCluster, ClickHouseInstance from helpers.network import PartitionManager from helpers.test_tools import TSV from test_storage_kafka.kafka_tests_utils import ( @@ -71,7 +71,10 @@ if is_arm(): [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], ) def test_kafka_column_types( - kafka_cluster, instance, create_query_generator, do_direct_read + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + do_direct_read, ): def assert_returned_exception(e): assert e.value.returncode == 36 @@ -136,7 +139,9 @@ def test_kafka_column_types( instance.query("DROP TABLE test.kafka SYNC") -def test_kafka_settings_old_syntax(kafka_cluster, instance): +def test_kafka_settings_old_syntax( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): assert TSV( instance.query( "SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro", @@ -181,7 +186,9 @@ kafka_topic_old old_t # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose")) -def test_kafka_settings_new_syntax(kafka_cluster, instance): +def test_kafka_settings_new_syntax( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): instance.query( """ CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -223,7 +230,9 @@ def test_kafka_settings_new_syntax(kafka_cluster, instance): assert members[0]["client_id"] == "instance test 1234" -def test_kafka_settings_predefined_macros(kafka_cluster, instance): +def test_kafka_settings_predefined_macros( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): instance.query( """ CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -251,7 +260,9 @@ def test_kafka_settings_predefined_macros(kafka_cluster, instance): assert members[0]["client_id"] == "test_kafka test 1234" -def test_kafka_json_as_string(kafka_cluster, instance): +def test_kafka_json_as_string( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): kafka_produce( kafka_cluster, "kafka_json_as_string", @@ -301,7 +312,11 @@ def test_kafka_json_as_string(kafka_cluster, instance): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_formats(kafka_cluster, instance, create_query_generator): +def test_kafka_formats( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): schema_registry_client = CachedSchemaRegistryClient( {"url": f"http://localhost:{kafka_cluster.schema_registry_port}"} ) @@ -770,7 +785,9 @@ def describe_consumer_group(kafka_cluster, name): # Tests -def test_kafka_issue11308(kafka_cluster, instance): +def test_kafka_issue11308( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): # Check that matview does respect Kafka SETTINGS kafka_produce( kafka_cluster, @@ -829,7 +846,9 @@ def test_kafka_issue11308(kafka_cluster, instance): assert TSV(result) == TSV(expected) -def test_kafka_issue4116(kafka_cluster, instance): +def test_kafka_issue4116( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): # Check that format_csv_delimiter parameter works now - as part of all available format settings. kafka_produce( kafka_cluster, @@ -864,7 +883,9 @@ def test_kafka_issue4116(kafka_cluster, instance): assert TSV(result) == TSV(expected) -def test_kafka_consumer_hang(kafka_cluster, instance): +def test_kafka_consumer_hang( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): admin_client = get_admin_client(kafka_cluster) topic_name = "consumer_hang" @@ -944,7 +965,9 @@ def test_kafka_consumer_hang(kafka_cluster, instance): kafka_delete_topic(admin_client, topic_name) -def test_kafka_consumer_hang2(kafka_cluster, instance): +def test_kafka_consumer_hang2( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): admin_client = get_admin_client(kafka_cluster) topic_name = "consumer_hang2" @@ -1005,7 +1028,9 @@ def test_kafka_consumer_hang2(kafka_cluster, instance): # sequential read from different consumers leads to breaking lot of kafka invariants # (first consumer will get all partitions initially, and may have problems in doing polls every 60 sec) -def test_kafka_read_consumers_in_parallel(kafka_cluster, instance): +def test_kafka_read_consumers_in_parallel( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): admin_client = get_admin_client(kafka_cluster) topic_name = "read_consumers_in_parallel" @@ -1071,7 +1096,9 @@ def test_kafka_read_consumers_in_parallel(kafka_cluster, instance): kafka_delete_topic(admin_client, topic_name) -def test_kafka_csv_with_delimiter(kafka_cluster, instance): +def test_kafka_csv_with_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): messages = [] for i in range(50): messages.append("{i}, {i}".format(i=i)) @@ -1098,7 +1125,9 @@ def test_kafka_csv_with_delimiter(kafka_cluster, instance): kafka_check_result(result, True) -def test_kafka_tsv_with_delimiter(kafka_cluster, instance): +def test_kafka_tsv_with_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): messages = [] for i in range(50): messages.append("{i}\t{i}".format(i=i)) @@ -1125,7 +1154,9 @@ def test_kafka_tsv_with_delimiter(kafka_cluster, instance): kafka_check_result(result, True) -def test_kafka_select_empty(kafka_cluster, instance): +def test_kafka_select_empty( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): admin_client = get_admin_client(kafka_cluster) topic_name = "empty" kafka_create_topic(admin_client, topic_name) @@ -1147,7 +1178,9 @@ def test_kafka_select_empty(kafka_cluster, instance): kafka_delete_topic(admin_client, topic_name) -def test_kafka_json_without_delimiter(kafka_cluster, instance): +def test_kafka_json_without_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): messages = "" for i in range(25): messages += json.dumps({"key": i, "value": i}) + "\n" @@ -1179,7 +1212,7 @@ def test_kafka_json_without_delimiter(kafka_cluster, instance): kafka_check_result(result, True) -def test_kafka_protobuf(kafka_cluster, instance): +def test_kafka_protobuf(kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance): kafka_produce_protobuf_messages(kafka_cluster, "pb", 0, 20) kafka_produce_protobuf_messages(kafka_cluster, "pb", 20, 1) kafka_produce_protobuf_messages(kafka_cluster, "pb", 21, 29) @@ -1206,7 +1239,9 @@ def test_kafka_protobuf(kafka_cluster, instance): kafka_check_result(result, True) -def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster, instance): +def test_kafka_string_field_on_first_position_in_protobuf( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): # https://github.com/ClickHouse/ClickHouse/issues/12615 kafka_produce_protobuf_social( kafka_cluster, "string_field_on_first_position_in_protobuf", 0, 20 @@ -1290,7 +1325,9 @@ John Doe 49 1000049 assert TSV(result) == TSV(expected) -def test_kafka_protobuf_no_delimiter(kafka_cluster, instance): +def test_kafka_protobuf_no_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): instance.query( """ CREATE TABLE test.kafka (key UInt64, value String) @@ -1355,7 +1392,11 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster, instance): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_materialized_view(kafka_cluster, instance, create_query_generator): +def test_kafka_materialized_view( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "mv" instance.query( @@ -1408,7 +1449,10 @@ def test_kafka_materialized_view(kafka_cluster, instance, create_query_generator ], ) def test_kafka_recreate_kafka_table( - kafka_cluster, instance, create_query_generator, log_line + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, ): """ Checks that materialized view work properly after dropping and recreating the Kafka table. @@ -1496,7 +1540,10 @@ def test_kafka_recreate_kafka_table( ], ) def test_librdkafka_compression( - kafka_cluster, instance, create_query_generator, log_line + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, ): """ Regression for UB in snappy-c (that is used in librdkafka), @@ -1579,7 +1626,9 @@ def test_librdkafka_compression( [generate_new_create_table_query, generate_old_create_table_query], ) def test_kafka_materialized_view_with_subquery( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): topic_name = "mysq" logging.debug(f"Using topic {topic_name}") @@ -1632,7 +1681,11 @@ def test_kafka_materialized_view_with_subquery( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_many_materialized_views(kafka_cluster, instance, create_query_generator): +def test_kafka_many_materialized_views( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = f"mmv-{get_topic_postfix(create_query_generator)}" create_query = create_query_generator( "kafka", @@ -1691,7 +1744,11 @@ def test_kafka_many_materialized_views(kafka_cluster, instance, create_query_gen "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_flush_on_big_message(kafka_cluster, instance, create_query_generator): +def test_kafka_flush_on_big_message( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): # Create batches of messages of size ~100Kb kafka_messages = 1000 batch_messages = 1000 @@ -1754,7 +1811,9 @@ def test_kafka_flush_on_big_message(kafka_cluster, instance, create_query_genera ), "ClickHouse lost some messages: {}".format(result) -def test_kafka_virtual_columns(kafka_cluster, instance): +def test_kafka_virtual_columns( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): topic_config = { # default retention, since predefined timestamp_ms is used. "retention.ms": "-1", @@ -1799,7 +1858,9 @@ def test_kafka_virtual_columns(kafka_cluster, instance): [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_virtual_columns_with_materialized_view( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): topic_config = { # default retention, since predefined timestamp_ms is used. @@ -1870,7 +1931,11 @@ def insert_with_retry(instance, values, table_name="kafka", max_try_count=5): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_insert(kafka_cluster, instance, create_query_generator): +def test_kafka_insert( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "insert1" + get_topic_postfix(create_query_generator) instance.query( @@ -1901,7 +1966,11 @@ def test_kafka_insert(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_produce_consume(kafka_cluster, instance, create_query_generator): +def test_kafka_produce_consume( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "insert2" + get_topic_postfix(create_query_generator) create_query = create_query_generator( @@ -1970,7 +2039,11 @@ def test_kafka_produce_consume(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_commit_on_block_write(kafka_cluster, instance, create_query_generator): +def test_kafka_commit_on_block_write( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "block" + get_topic_postfix(create_query_generator) create_query = create_query_generator( "kafka", @@ -2053,7 +2126,10 @@ def test_kafka_commit_on_block_write(kafka_cluster, instance, create_query_gener ], ) def test_kafka_virtual_columns2( - kafka_cluster, instance, create_query_generator, log_line + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, ): admin_client = get_admin_client(kafka_cluster) @@ -2207,7 +2283,10 @@ def test_kafka_virtual_columns2( [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], ) def test_kafka_producer_consumer_separate_settings( - kafka_cluster, instance, create_query_generator, do_direct_read + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + do_direct_read, ): instance.rotate_logs() instance.query( @@ -2291,7 +2370,10 @@ def test_kafka_producer_consumer_separate_settings( ], ) def test_kafka_produce_key_timestamp( - kafka_cluster, instance, create_query_generator, log_line + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, ): topic_name = "insert3" topic_config = { @@ -2372,7 +2454,11 @@ def test_kafka_produce_key_timestamp( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_insert_avro(kafka_cluster, instance, create_query_generator): +def test_kafka_insert_avro( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2427,7 +2513,11 @@ def test_kafka_insert_avro(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_produce_consume_avro(kafka_cluster, instance, create_query_generator): +def test_kafka_produce_consume_avro( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "insert_avro" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): num_rows = 75 @@ -2487,7 +2577,11 @@ def test_kafka_produce_consume_avro(kafka_cluster, instance, create_query_genera "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_flush_by_time(kafka_cluster, instance, create_query_generator): +def test_kafka_flush_by_time( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2556,7 +2650,11 @@ def test_kafka_flush_by_time(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_flush_by_block_size(kafka_cluster, instance, create_query_generator): +def test_kafka_flush_by_block_size( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "flush_by_block_size" + get_topic_postfix(create_query_generator) cancel = threading.Event() @@ -2634,7 +2732,9 @@ def test_kafka_flush_by_block_size(kafka_cluster, instance, create_query_generat [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_lot_of_partitions_partial_commit_of_bulk( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) @@ -2700,7 +2800,12 @@ def test_kafka_lot_of_partitions_partial_commit_of_bulk( (generate_new_create_table_query, "{}.*Saved offset"), ], ) -def test_kafka_rebalance(kafka_cluster, instance, create_query_generator, log_line): +def test_kafka_rebalance( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, +): NUMBER_OF_CONSURRENT_CONSUMERS = 11 instance.query( @@ -2876,7 +2981,9 @@ def test_kafka_rebalance(kafka_cluster, instance, create_query_generator, log_li ], ) def test_kafka_no_holes_when_write_suffix_failed( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) @@ -2951,7 +3058,9 @@ def test_kafka_no_holes_when_write_suffix_failed( assert TSV(result) == TSV("22\t22\t22") -def test_exception_from_destructor(kafka_cluster, instance): +def test_exception_from_destructor( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): instance.query( """ CREATE TABLE test.kafka (key UInt64, value String) @@ -2999,7 +3108,9 @@ def test_exception_from_destructor(kafka_cluster, instance): [generate_old_create_table_query, generate_new_create_table_query], ) def test_commits_of_unprocessed_messages_on_drop( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): topic_name = "commits_of_unprocessed_messages_on_drop" + get_topic_postfix( create_query_generator @@ -3113,7 +3224,11 @@ def test_commits_of_unprocessed_messages_on_drop( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_bad_reschedule(kafka_cluster, instance, create_query_generator): +def test_bad_reschedule( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(20000)] @@ -3159,7 +3274,9 @@ def test_bad_reschedule(kafka_cluster, instance, create_query_generator): ) -def test_kafka_duplicates_when_commit_failed(kafka_cluster, instance): +def test_kafka_duplicates_when_commit_failed( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] kafka_produce(kafka_cluster, "duplicates_when_commit_failed", messages) @@ -3238,7 +3355,11 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster, instance): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_premature_flush_on_eof(kafka_cluster, instance, create_query_generator): +def test_premature_flush_on_eof( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "premature_flush_on_eof" + get_topic_postfix(create_query_generator) create_query = create_query_generator( "kafka", @@ -3320,7 +3441,10 @@ def test_premature_flush_on_eof(kafka_cluster, instance, create_query_generator) [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], ) def test_kafka_unavailable( - kafka_cluster, instance, create_query_generator, do_direct_read + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + do_direct_read, ): number_of_messages = 20000 topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) @@ -3378,7 +3502,11 @@ def test_kafka_unavailable( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_issue14202(kafka_cluster, instance, create_query_generator): +def test_kafka_issue14202( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): """ INSERT INTO Kafka Engine from an empty SELECT sub query was leading to failure """ @@ -3417,7 +3545,9 @@ def test_kafka_issue14202(kafka_cluster, instance, create_query_generator): ) -def test_kafka_csv_with_thread_per_consumer(kafka_cluster, instance): +def test_kafka_csv_with_thread_per_consumer( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): instance.query( """ CREATE TABLE test.kafka (key UInt64, value UInt64) @@ -3456,7 +3586,9 @@ def random_string(size=8): [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_engine_put_errors_to_stream( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): topic_name = "kafka_engine_put_errors_to_stream" + get_topic_postfix( create_query_generator @@ -3551,7 +3683,9 @@ def gen_message_with_jsons(jsons=10, malformed=0): [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): topic_name = ( "kafka_engine_put_errors_to_stream_with_random_malformed_json" @@ -3621,7 +3755,9 @@ def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( [generate_old_create_table_query, generate_new_create_table_query], ) def test_kafka_formats_with_broken_message( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): # data was dumped from clickhouse itself in a following manner # clickhouse-client --format=Native --query='SELECT toInt64(number) as id, toUInt16( intDiv( id, 65536 ) ) as blockNo, reinterpretAsString(19777) as val1, toFloat32(0.5) as val2, toUInt8(1) as val3 from numbers(100) ORDER BY id' | xxd -ps | tr -d '\n' | sed 's/\(..\)/\\x\1/g' @@ -3958,7 +4094,11 @@ def test_kafka_formats_with_broken_message( generate_new_create_table_query, ], ) -def test_kafka_consumer_failover(kafka_cluster, instance, create_query_generator): +def test_kafka_consumer_failover( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "kafka_consumer_failover" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=2): @@ -4150,7 +4290,9 @@ def test_kafka_consumer_failover(kafka_cluster, instance, create_query_generator ) -def test_kafka_predefined_configuration(kafka_cluster, instance): +def test_kafka_predefined_configuration( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -4181,7 +4323,11 @@ def test_kafka_predefined_configuration(kafka_cluster, instance): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_issue26643(kafka_cluster, instance, create_query_generator): +def test_issue26643( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): producer = KafkaProducer( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer, @@ -4301,7 +4447,11 @@ def test_issue26643(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_num_consumers_limit(kafka_cluster, instance, create_query_generator): +def test_num_consumers_limit( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): instance.query("DROP TABLE IF EXISTS test.kafka") thread_per_consumer = must_use_thread_per_consumer(create_query_generator) @@ -4335,7 +4485,11 @@ def test_num_consumers_limit(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_format_with_prefix_and_suffix(kafka_cluster, instance, create_query_generator): +def test_format_with_prefix_and_suffix( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "custom" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): @@ -4372,7 +4526,11 @@ def test_format_with_prefix_and_suffix(kafka_cluster, instance, create_query_gen "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_max_rows_per_message(kafka_cluster, instance, create_query_generator): +def test_max_rows_per_message( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "custom_max_rows_per_message" + get_topic_postfix( create_query_generator ) @@ -4430,10 +4588,14 @@ def test_max_rows_per_message(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_row_based_formats(kafka_cluster, instance, create_query_generator): +def test_row_based_formats( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): admin_client = get_admin_client(kafka_cluster) - for format_name in [ + formats_to_test = [ "TSV", "TSVWithNamesAndTypes", "TSKV", @@ -4450,63 +4612,73 @@ def test_row_based_formats(kafka_cluster, instance, create_query_generator): "RowBinary", "RowBinaryWithNamesAndTypes", "MsgPack", - ]: - logging.debug("Checking {format_name}") + ] + + num_rows = 10 + max_rows_per_message = 5 + message_count = num_rows / max_rows_per_message + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + + for format_name in formats_to_test: + logging.debug("Setting up {format_name}") topic_name = format_name + get_topic_postfix(create_query_generator) table_name = f"kafka_{format_name}" - with kafka_topic(admin_client, topic_name): - num_rows = 10 - max_rows_per_message = 5 - message_count = num_rows / max_rows_per_message + kafka_create_topic(admin_client, topic_name) - create_query = create_query_generator( - table_name, - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format=format_name, - settings={"kafka_max_rows_per_message": max_rows_per_message}, - ) + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + settings={"kafka_max_rows_per_message": max_rows_per_message}, + ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.{table_name}; + instance.query( + f""" + DROP TABLE IF EXISTS test.{table_name}_view; + DROP TABLE IF EXISTS test.{table_name}; - {create_query}; + {create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; + CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; - INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); - """ - ) + INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); + """ + ) - messages = kafka_consume_with_retry( - kafka_cluster, topic_name, message_count, need_decode=False - ) + for format_name in formats_to_test: + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + messages = kafka_consume_with_retry( + kafka_cluster, topic_name, message_count, need_decode=False + ) - assert len(messages) == message_count + assert len(messages) == message_count - instance.query_with_retry( - "SELECT count() FROM test.view", - check_callback=lambda res: int(res) == num_rows, - ) + result = instance.query_with_retry( + f"SELECT * FROM test.{table_name}_view", + check_callback=lambda res: res == expected, + ) - result = instance.query("SELECT * FROM test.view") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected + assert result == expected + kafka_delete_topic(admin_client, topic_name) @pytest.mark.parametrize( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_block_based_formats_1(kafka_cluster, instance, create_query_generator): +def test_block_based_formats_1( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "pretty_space" + get_topic_postfix(create_query_generator) with kafka_topic(get_admin_client(kafka_cluster), topic_name): @@ -4552,7 +4724,11 @@ def test_block_based_formats_1(kafka_cluster, instance, create_query_generator): "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_block_based_formats_2(kafka_cluster, instance, create_query_generator): +def test_block_based_formats_2( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): admin_client = get_admin_client(kafka_cluster) num_rows = 100 message_count = 9 @@ -4611,7 +4787,9 @@ def test_block_based_formats_2(kafka_cluster, instance, create_query_generator): assert result == expected -def test_system_kafka_consumers(kafka_cluster, instance): +def test_system_kafka_consumers( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -4687,7 +4865,9 @@ is_currently_used: 0 kafka_delete_topic(admin_client, topic) -def test_system_kafka_consumers_rebalance(kafka_cluster, instance): +def test_system_kafka_consumers_rebalance( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): # based on test_kafka_consumer_hang2 admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) @@ -4806,7 +4986,9 @@ is_currently_used: 0 kafka_delete_topic(admin_client, topic) -def test_system_kafka_consumers_rebalance_mv(kafka_cluster, instance, max_retries=15): +def test_system_kafka_consumers_rebalance_mv( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance, max_retries=15 +): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -4897,12 +5079,12 @@ JSONExtractString(rdkafka_stat, 'type'): consumer kafka_delete_topic(admin_client, topic) -def test_formats_errors(kafka_cluster, instance): +def test_formats_errors(kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) - for format_name in [ + formats_to_test = [ "Template", "Regexp", "TSV", @@ -4933,7 +5115,6 @@ def test_formats_errors(kafka_cluster, instance): "Arrow", "Parquet", "ORC", - "JSONCompactColumns", "Npy", "ParquetMetadata", "CapnProto", @@ -4943,49 +5124,54 @@ def test_formats_errors(kafka_cluster, instance): "DWARF", "HiveText", "MySQLDump", - ]: - with kafka_topic(admin_client, format_name): - table_name = f"kafka_{format_name}" + ] - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.{table_name}; + for format_name in formats_to_test: + kafka_create_topic(admin_client, format_name) + table_name = f"kafka_{format_name}" - CREATE TABLE test.{table_name} (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}', - kafka_max_rows_per_message = 5, - format_template_row='template_row.format', - format_regexp='id: (.+?)', - input_format_with_names_use_header=0, - format_schema='key_value_message:Message'; + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; - """ + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_max_rows_per_message = 5, + format_template_row='template_row.format', + format_regexp='id: (.+?)', + input_format_with_names_use_header=0, + format_schema='key_value_message:Message'; + + CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + """ + ) + + kafka_produce( + kafka_cluster, + format_name, + ["Broken message\nBroken message\nBroken message\n"], + ) + + for format_name in formats_to_test: + table_name = f"kafka_{format_name}" + num_errors = int( + instance.query_with_retry( + f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", + check_callback=lambda res: int(res) > 0, ) + ) - kafka_produce( - kafka_cluster, - format_name, - ["Broken message\nBroken message\nBroken message\n"], - ) + assert num_errors > 0, f"Failed {format_name}" - num_errors = int( - instance.query_with_retry( - f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", - check_callback=lambda res: int(res) > 0, - ) - ) - - assert num_errors > 0 - - instance.query(f"DROP TABLE test.{table_name}") - instance.query("DROP TABLE test.view") + instance.query(f"DROP TABLE test.{table_name}") + instance.query(f"DROP TABLE test.{table_name}_view") + kafka_delete_topic(admin_client, format_name) @pytest.mark.parametrize( @@ -4993,7 +5179,9 @@ def test_formats_errors(kafka_cluster, instance): [generate_old_create_table_query, generate_new_create_table_query], ) def test_multiple_read_in_materialized_views( - kafka_cluster, instance, create_query_generator + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, ): topic_name = "multiple_read_from_mv" + get_topic_postfix(create_query_generator) @@ -5072,7 +5260,11 @@ def test_multiple_read_in_materialized_views( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_null_message(kafka_cluster, instance, create_query_generator): +def test_kafka_null_message( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): topic_name = "null_message" instance.query( From 009749c414e8658c33300fbf4f601031420602ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 10 Sep 2024 15:37:40 +0000 Subject: [PATCH 03/11] Reduce rebalance time when consuming messages in tests --- tests/integration/test_storage_kafka/kafka_tests_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_kafka/kafka_tests_utils.py b/tests/integration/test_storage_kafka/kafka_tests_utils.py index 5c32221b9cf..fa3ae062f14 100644 --- a/tests/integration/test_storage_kafka/kafka_tests_utils.py +++ b/tests/integration/test_storage_kafka/kafka_tests_utils.py @@ -148,8 +148,9 @@ def kafka_produce(kafka_cluster, topic, messages, timestamp=None, retries=15): def kafka_consume(kafka_cluster, topic, need_decode=True, timestamp=0): consumer = KafkaConsumer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + bootstrap_servers=f"localhost:{kafka_cluster.kafka_port}", auto_offset_reset="earliest", + session_timeout_ms=1000, ) consumer.subscribe(topics=(topic)) for toppar, messages in list(consumer.poll(5000).items()): From 701b525bc9cbf4c6a8cb5a6942a9c7be15308eb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 11 Sep 2024 08:11:09 +0000 Subject: [PATCH 04/11] Easy speedups --- tests/integration/test_storage_kafka/test.py | 11 +++++++++-- .../test_storage_kafka/test_produce_http_interface.py | 6 +++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9affd4adf6f..0783b50caae 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -4647,11 +4647,18 @@ def test_row_based_formats( CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS SELECT key, value FROM test.{table_name}; - - INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}); """ ) + for format_name in formats_to_test: + logging.debug("Inserting to {format_name}") + + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + instance.query( + f"INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows})" + ) + for format_name in formats_to_test: topic_name = format_name + get_topic_postfix(create_query_generator) table_name = f"kafka_{format_name}" diff --git a/tests/integration/test_storage_kafka/test_produce_http_interface.py b/tests/integration/test_storage_kafka/test_produce_http_interface.py index 8a666d019d3..2d96c5e8282 100644 --- a/tests/integration/test_storage_kafka/test_produce_http_interface.py +++ b/tests/integration/test_storage_kafka/test_produce_http_interface.py @@ -94,7 +94,7 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster, instance): "MsgPack", ] for format in formats_to_test: - logging.debug(f"Creating tables and writing messages to {format}") + logging.debug(f"Creating tables for {format}") topic = topic_prefix + format kafka_create_topic(admin_client, topic) @@ -124,6 +124,10 @@ def test_kafka_produce_http_interface_row_based_format(kafka_cluster, instance): SELECT key, value FROM test.kafka_{topic}; """ ) + + for format in formats_to_test: + logging.debug(f"Inserting data to {format}") + topic = topic_prefix + format instance.http_query( insert_query_template.format(table_name="test.kafka_writer_" + topic), method="POST", From dc67e7cc832f5208c6329c531cd619cf44673421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 11 Sep 2024 13:33:40 +0000 Subject: [PATCH 05/11] Split kafka tests --- tests/integration/parallel.readme | 3 + tests/integration/parallel_skip.json | 130 +- .../test_storage_kafka/kafka_tests_utils.py | 87 +- .../integration/test_storage_kafka/test_1.py | 1306 +++++ .../integration/test_storage_kafka/test_2.py | 1232 +++++ .../integration/test_storage_kafka/test_3.py | 1088 ++++ .../test_storage_kafka/{test.py => test_4.py} | 4735 +++-------------- 7 files changed, 4381 insertions(+), 4200 deletions(-) create mode 100644 tests/integration/test_storage_kafka/test_1.py create mode 100644 tests/integration/test_storage_kafka/test_2.py create mode 100644 tests/integration/test_storage_kafka/test_3.py rename tests/integration/test_storage_kafka/{test.py => test_4.py} (57%) diff --git a/tests/integration/parallel.readme b/tests/integration/parallel.readme index 5533ab8b84d..205de2ca76c 100644 --- a/tests/integration/parallel.readme +++ b/tests/integration/parallel.readme @@ -4,3 +4,6 @@ ./runner ' --setup-plan' | grep '::' | sed 's/ (fixtures used:.*//g' | sed 's/^ *//g' | sed 's/ *$//g' | sort -u > all_tests.txt # 2. Filter known tests that are currently not run in parallel cat all_tests.txt | grep '^test_replicated_database\|^test_disabled_mysql_server\|^test_distributed_ddl\|^test_distributed_ddl\|^test_quorum_inserts_parallel\|^test_ddl_worker_non_leader\|^test_consistent_parts_after_clone_replica\|^test_materialized_mysql_database\|^test_atomic_drop_table\|^test_distributed_respect_user_timeouts\|^test_storage_kafka\|^test_replace_partition\|^test_replicated_fetches_timeouts\|^test_system_clusters_actual_information\|^test_delayed_replica_failover\|^test_limited_replicated_fetches\|^test_hedged_requests\|^test_insert_into_distributed\|^test_insert_into_distributed_through_materialized_view\|^test_drop_replica\|^test_attach_without_fetching\|^test_system_replicated_fetches\|^test_cross_replication\|^test_dictionary_allow_read_expired_keys\|^test_dictionary_allow_read_expired_keys\|^test_dictionary_allow_read_expired_keys\|^test_insert_into_distributed_sync_async\|^test_hedged_requests_parallel\|^test_dictionaries_update_field\|^test_broken_part_during_merge\|^test_random_inserts\|^test_reload_clusters_config\|^test_parts_delete_zookeeper\|^test_polymorphic_parts\|^test_keeper_multinode_simple\|^test_https_replication\|^test_storage_kerberized_kafka\|^test_cleanup_dir_after_bad_zk_conn\|^test_system_metrics\|^test_keeper_multinode_blocade_leader' | awk '{$1=$1;print}' | jq -R -n '[inputs] | .' > parallel_skip.json + +# To easily add all tests from a directory the following command can be used to list all tests in a directory, e.g in test_storage_kafka: +pytest test_storage_kafka --collect-only -q diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 507894534d4..ed1808bf27b 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -99,71 +99,71 @@ "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_table_detach", "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect", - "test_storage_kafka/test.py::test_kafka_column_types", - "test_storage_kafka/test.py::test_kafka_settings_old_syntax", - "test_storage_kafka/test.py::test_kafka_settings_new_syntax", - "test_storage_kafka/test.py::test_kafka_settings_predefined_macros", - "test_storage_kafka/test.py::test_kafka_json_as_string", - "test_storage_kafka/test.py::test_kafka_formats", - "test_storage_kafka/test.py::test_kafka_issue11308", - "test_storage_kafka/test.py::test_kafka_issue4116", - "test_storage_kafka/test.py::test_kafka_consumer_hang", - "test_storage_kafka/test.py::test_kafka_consumer_hang2", - "test_storage_kafka/test.py::test_kafka_read_consumers_in_parallel", - "test_storage_kafka/test.py::test_kafka_csv_with_delimiter", - "test_storage_kafka/test.py::test_kafka_tsv_with_delimiter", - "test_storage_kafka/test.py::test_kafka_select_empty", - "test_storage_kafka/test.py::test_kafka_json_without_delimiter", - "test_storage_kafka/test.py::test_kafka_protobuf", - "test_storage_kafka/test.py::test_kafka_string_field_on_first_position_in_protobuf", - "test_storage_kafka/test.py::test_kafka_protobuf_no_delimiter", - "test_storage_kafka/test.py::test_kafka_materialized_view", - "test_storage_kafka/test.py::test_kafka_recreate_kafka_table", - "test_storage_kafka/test.py::test_librdkafka_compression", - "test_storage_kafka/test.py::test_kafka_materialized_view_with_subquery", - "test_storage_kafka/test.py::test_kafka_many_materialized_views", - "test_storage_kafka/test.py::test_kafka_flush_on_big_message", - "test_storage_kafka/test.py::test_kafka_virtual_columns", - "test_storage_kafka/test.py::test_kafka_virtual_columns_with_materialized_view", - "test_storage_kafka/test.py::test_kafka_insert", - "test_storage_kafka/test.py::test_kafka_produce_consume", - "test_storage_kafka/test.py::test_kafka_commit_on_block_write", - "test_storage_kafka/test.py::test_kafka_virtual_columns2", - "test_storage_kafka/test.py::test_kafka_producer_consumer_separate_settings", - "test_storage_kafka/test.py::test_kafka_produce_key_timestamp", - "test_storage_kafka/test.py::test_kafka_insert_avro", - "test_storage_kafka/test.py::test_kafka_produce_consume_avro", - "test_storage_kafka/test.py::test_kafka_flush_by_time", - "test_storage_kafka/test.py::test_kafka_flush_by_block_size", - "test_storage_kafka/test.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", - "test_storage_kafka/test.py::test_kafka_rebalance", - "test_storage_kafka/test.py::test_kafka_no_holes_when_write_suffix_failed", - "test_storage_kafka/test.py::test_exception_from_destructor", - "test_storage_kafka/test.py::test_commits_of_unprocessed_messages_on_drop", - "test_storage_kafka/test.py::test_bad_reschedule", - "test_storage_kafka/test.py::test_kafka_duplicates_when_commit_failed", - "test_storage_kafka/test.py::test_premature_flush_on_eof", - "test_storage_kafka/test.py::test_kafka_unavailable", - "test_storage_kafka/test.py::test_kafka_issue14202", - "test_storage_kafka/test.py::test_kafka_csv_with_thread_per_consumer", - "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream", - "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream_with_random_malformed_json", - "test_storage_kafka/test.py::test_kafka_formats_with_broken_message", - "test_storage_kafka/test.py::test_kafka_consumer_failover", - "test_storage_kafka/test.py::test_kafka_predefined_configuration", - "test_storage_kafka/test.py::test_issue26643", - "test_storage_kafka/test.py::test_num_consumers_limit", - "test_storage_kafka/test.py::test_format_with_prefix_and_suffix", - "test_storage_kafka/test.py::test_max_rows_per_message", - "test_storage_kafka/test.py::test_row_based_formats", - "test_storage_kafka/test.py::test_block_based_formats_1", - "test_storage_kafka/test.py::test_block_based_formats_2", - "test_storage_kafka/test.py::test_system_kafka_consumers", - "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance", - "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance_mv", - "test_storage_kafka/test.py::test_formats_errors", - "test_storage_kafka/test.py::test_multiple_read_in_materialized_views", - "test_storage_kafka/test.py::test_kafka_null_message", + "test_storage_kafka/test_1.py::test_kafka_consumer_failover", + "test_storage_kafka/test_1.py::test_row_based_formats", + "test_storage_kafka/test_1.py::test_kafka_duplicates_when_commit_failed", + "test_storage_kafka/test_1.py::test_commits_of_unprocessed_messages_on_drop", + "test_storage_kafka/test_1.py::test_kafka_produce_consume", + "test_storage_kafka/test_1.py::test_system_kafka_consumers_rebalance", + "test_storage_kafka/test_1.py::test_kafka_materialized_view", + "test_storage_kafka/test_1.py::test_kafka_materialized_view_with_subquery", + "test_storage_kafka/test_1.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", + "test_storage_kafka/test_1.py::test_issue26643", + "test_storage_kafka/test_1.py::test_kafka_csv_with_thread_per_consumer", + "test_storage_kafka/test_1.py::test_kafka_select_empty", + "test_storage_kafka/test_1.py::test_kafka_issue11308", + "test_storage_kafka/test_1.py::test_kafka_protobuf", + "test_storage_kafka/test_1.py::test_kafka_predefined_configuration", + "test_storage_kafka/test_1.py::test_kafka_insert_avro", + "test_storage_kafka/test_1.py::test_exception_from_destructor", + "test_storage_kafka/test_2.py::test_kafka_unavailable", + "test_storage_kafka/test_2.py::test_bad_reschedule", + "test_storage_kafka/test_2.py::test_librdkafka_compression", + "test_storage_kafka/test_2.py::test_multiple_read_in_materialized_views", + "test_storage_kafka/test_2.py::test_kafka_virtual_columns2", + "test_storage_kafka/test_2.py::test_kafka_produce_key_timestamp", + "test_storage_kafka/test_2.py::test_kafka_engine_put_errors_to_stream_with_random_malformed_json", + "test_storage_kafka/test_2.py::test_kafka_produce_consume_avro", + "test_storage_kafka/test_2.py::test_kafka_virtual_columns_with_materialized_view", + "test_storage_kafka/test_2.py::test_system_kafka_consumers_rebalance_mv", + "test_storage_kafka/test_2.py::test_system_kafka_consumers", + "test_storage_kafka/test_2.py::test_kafka_insert", + "test_storage_kafka/test_2.py::test_kafka_settings_old_syntax", + "test_storage_kafka/test_2.py::test_kafka_json_without_delimiter", + "test_storage_kafka/test_2.py::test_kafka_string_field_on_first_position_in_protobuf", + "test_storage_kafka/test_2.py::test_block_based_formats_1", + "test_storage_kafka/test_3.py::test_formats_errors", + "test_storage_kafka/test_3.py::test_block_based_formats_2", + "test_storage_kafka/test_3.py::test_kafka_flush_on_big_message", + "test_storage_kafka/test_3.py::test_kafka_recreate_kafka_table", + "test_storage_kafka/test_3.py::test_kafka_commit_on_block_write", + "test_storage_kafka/test_3.py::test_kafka_no_holes_when_write_suffix_failed", + "test_storage_kafka/test_3.py::test_kafka_flush_by_block_size", + "test_storage_kafka/test_3.py::test_max_rows_per_message", + "test_storage_kafka/test_3.py::test_kafka_many_materialized_views", + "test_storage_kafka/test_3.py::test_kafka_engine_put_errors_to_stream", + "test_storage_kafka/test_3.py::test_kafka_producer_consumer_separate_settings", + "test_storage_kafka/test_3.py::test_kafka_virtual_columns", + "test_storage_kafka/test_3.py::test_kafka_settings_new_syntax", + "test_storage_kafka/test_3.py::test_kafka_tsv_with_delimiter", + "test_storage_kafka/test_3.py::test_kafka_csv_with_delimiter", + "test_storage_kafka/test_3.py::test_format_with_prefix_and_suffix", + "test_storage_kafka/test_4.py::test_kafka_formats", + "test_storage_kafka/test_4.py::test_kafka_rebalance", + "test_storage_kafka/test_4.py::test_kafka_formats_with_broken_message", + "test_storage_kafka/test_4.py::test_kafka_flush_by_time", + "test_storage_kafka/test_4.py::test_system_kafka_consumers_rebalance", + "test_storage_kafka/test_4.py::test_kafka_consumer_hang", + "test_storage_kafka/test_4.py::test_kafka_null_message", + "test_storage_kafka/test_4.py::test_kafka_read_consumers_in_parallel", + "test_storage_kafka/test_4.py::test_premature_flush_on_eof", + "test_storage_kafka/test_4.py::test_num_consumers_limit", + "test_storage_kafka/test_4.py::test_kafka_column_types", + "test_storage_kafka/test_4.py::test_kafka_protobuf_no_delimiter", + "test_storage_kafka/test_4.py::test_kafka_json_as_string", + "test_storage_kafka/test_4.py::test_kafka_settings_predefined_macros", + "test_storage_kafka/test_4.py::test_kafka_issue4116", + "test_storage_kafka/test_4.py::test_kafka_issue14202", "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", diff --git a/tests/integration/test_storage_kafka/kafka_tests_utils.py b/tests/integration/test_storage_kafka/kafka_tests_utils.py index fa3ae062f14..a13b2f37d75 100644 --- a/tests/integration/test_storage_kafka/kafka_tests_utils.py +++ b/tests/integration/test_storage_kafka/kafka_tests_utils.py @@ -1,21 +1,26 @@ -import logging -import time -import avro.schema -import avro.io import io -from kafka.admin import NewTopic -from kafka import KafkaAdminClient, KafkaProducer, KafkaConsumer -from confluent_kafka.avro.serializer.message_serializer import MessageSerializer +import logging +import os +import socket +import time from contextlib import contextmanager -from google.protobuf.internal.encoder import _VarintBytes +import avro.io +import avro.schema +from confluent_kafka.avro.serializer.message_serializer import MessageSerializer +from google.protobuf.internal.encoder import _VarintBytes +from helpers.client import QueryRuntimeException +from helpers.test_tools import TSV +from kafka import BrokerConnection, KafkaAdminClient, KafkaConsumer, KafkaProducer +from kafka.admin import NewTopic +from kafka.protocol.admin import DescribeGroupsRequest_v1 +from kafka.protocol.group import MemberAssignment # protoc --version # libprotoc 3.0.0 # # to create kafka_pb2.py # protoc --python_out=. kafka.proto -from test_storage_kafka import kafka_pb2 -from test_storage_kafka import social_pb2 +from test_storage_kafka import kafka_pb2, social_pb2 @contextmanager @@ -211,7 +216,6 @@ def kafka_consume_with_retry( def kafka_produce_protobuf_messages_no_delimiters( kafka_cluster, topic, start_index, num_messages ): - data = "" producer = KafkaProducer( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -379,3 +383,64 @@ def get_topic_postfix(generator): if generator == generate_new_create_table_query: return "new" raise Exception("Unexpected generator") + + +# Since everything is async and shaky when receiving messages from Kafka, +# we may want to try and check results multiple times in a loop. +def kafka_check_result(result, check=False, ref_file="test_kafka_json.reference"): + fpath = os.path.join(os.path.dirname(__file__), ref_file) + with open(fpath) as reference: + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) + + +# https://stackoverflow.com/a/57692111/1555175 +def describe_consumer_group(kafka_cluster, name): + client = BrokerConnection("localhost", kafka_cluster.kafka_port, socket.AF_INET) + client.connect_blocking() + + list_members_in_groups = DescribeGroupsRequest_v1(groups=[name]) + future = client.send(list_members_in_groups) + while not future.is_done: + for resp, f in client.recv(): + f.success(resp) + + ( + error_code, + group_id, + state, + protocol_type, + protocol, + members, + ) = future.value.groups[0] + + res = [] + for member in members: + (member_id, client_id, client_host, member_metadata, member_assignment) = member + member_info = {} + member_info["member_id"] = member_id + member_info["client_id"] = client_id + member_info["client_host"] = client_host + member_topics_assignment = [] + for topic, partitions in MemberAssignment.decode(member_assignment).assignment: + member_topics_assignment.append({"topic": topic, "partitions": partitions}) + member_info["assignment"] = member_topics_assignment + res.append(member_info) + return res + + +def insert_with_retry(instance, values, table_name="kafka", max_try_count=5): + try_count = 0 + while True: + logging.debug(f"Inserting, try_count is {try_count}") + try: + try_count += 1 + instance.query(f"INSERT INTO test.{table_name} VALUES {values}") + break + except QueryRuntimeException as e: + if "Local: Timed out." in str(e) and try_count < max_try_count: + continue + else: + raise diff --git a/tests/integration/test_storage_kafka/test_1.py b/tests/integration/test_storage_kafka/test_1.py new file mode 100644 index 00000000000..59a587614bf --- /dev/null +++ b/tests/integration/test_storage_kafka/test_1.py @@ -0,0 +1,1306 @@ +import io +import json +import logging +import random +import threading +import time + +import avro.datafile +import avro.io +import avro.schema +import pytest +from google.protobuf.internal.encoder import _VarintBytes +from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient, KafkaProducer +from test_storage_kafka import message_with_repeated_pb2 +from test_storage_kafka.conftest import conftest_cluster, init_cluster_and_instance +from test_storage_kafka.kafka_tests_utils import ( + existing_kafka_topic, + generate_new_create_table_query, + generate_old_create_table_query, + get_admin_client, + get_topic_postfix, + insert_with_retry, + kafka_check_result, + kafka_consume_with_retry, + kafka_create_topic, + kafka_delete_topic, + kafka_produce, + kafka_produce_protobuf_messages, + kafka_topic, + must_use_thread_per_consumer, + producer_serializer, +) + +# Skip all tests on ARM +if is_arm(): + pytestmark = pytest.mark.skip + +# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. +# TODO: add test for SELECT LIMIT is working. + + +def decode_avro(message): + b = io.BytesIO(message) + ret = avro.datafile.DataFileReader(b, avro.io.DatumReader()) + + output = io.StringIO() + for record in ret: + print(record, file=output) + return output.getvalue() + + +@pytest.mark.parametrize( + "create_query_generator", + [ + generate_old_create_table_query, + generate_new_create_table_query, + ], +) +def test_kafka_consumer_failover( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "kafka_consumer_failover" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=2): + consumer_group = f"{topic_name}_group" + create_queries = [] + for counter in range(3): + create_queries.append( + create_query_generator( + f"kafka{counter+1}", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=consumer_group, + settings={ + "kafka_max_block_size": 1, + "kafka_poll_timeout_ms": 200, + }, + ) + ) + + instance.query( + f""" + {create_queries[0]}; + {create_queries[1]}; + {create_queries[2]}; + + CREATE TABLE test.destination ( + key UInt64, + value UInt64, + _consumed_by LowCardinality(String) + ) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.kafka1_mv TO test.destination AS + SELECT key, value, 'kafka1' as _consumed_by + FROM test.kafka1; + + CREATE MATERIALIZED VIEW test.kafka2_mv TO test.destination AS + SELECT key, value, 'kafka2' as _consumed_by + FROM test.kafka2; + + CREATE MATERIALIZED VIEW test.kafka3_mv TO test.destination AS + SELECT key, value, 'kafka3' as _consumed_by + FROM test.kafka3; + """ + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + ## all 3 attached, 2 working + producer.send( + topic=topic_name, + value=json.dumps({"key": 1, "value": 1}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 1, "value": 1}), + partition=1, + ) + producer.flush() + + count_query = "SELECT count() FROM test.destination" + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > 0 + ) + + ## 2 attached, 2 working + instance.query("DETACH TABLE test.kafka1") + producer.send( + topic=topic_name, + value=json.dumps({"key": 2, "value": 2}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 2, "value": 2}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + ## 1 attached, 1 working + instance.query("DETACH TABLE test.kafka2") + producer.send( + topic=topic_name, + value=json.dumps({"key": 3, "value": 3}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 3, "value": 3}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + ## 2 attached, 2 working + instance.query("ATTACH TABLE test.kafka1") + producer.send( + topic=topic_name, + value=json.dumps({"key": 4, "value": 4}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 4, "value": 4}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + ## 1 attached, 1 working + instance.query("DETACH TABLE test.kafka3") + producer.send( + topic=topic_name, + value=json.dumps({"key": 5, "value": 5}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 5, "value": 5}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + ## 2 attached, 2 working + instance.query("ATTACH TABLE test.kafka2") + producer.send( + topic=topic_name, + value=json.dumps({"key": 6, "value": 6}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 6, "value": 6}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + ## 3 attached, 2 working + instance.query("ATTACH TABLE test.kafka3") + producer.send( + topic=topic_name, + value=json.dumps({"key": 7, "value": 7}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 7, "value": 7}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + ## 2 attached, same 2 working + instance.query("DETACH TABLE test.kafka3") + producer.send( + topic=topic_name, + value=json.dumps({"key": 8, "value": 8}), + partition=0, + ) + producer.send( + topic=topic_name, + value=json.dumps({"key": 8, "value": 8}), + partition=1, + ) + producer.flush() + prev_count = instance.query_with_retry( + count_query, check_callback=lambda res: int(res) > prev_count + ) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_row_based_formats( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + admin_client = get_admin_client(kafka_cluster) + + formats_to_test = [ + "TSV", + "TSVWithNamesAndTypes", + "TSKV", + "CSV", + "CSVWithNamesAndTypes", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "RowBinaryWithNamesAndTypes", + "MsgPack", + ] + + num_rows = 10 + max_rows_per_message = 5 + message_count = num_rows / max_rows_per_message + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + + for format_name in formats_to_test: + logging.debug("Setting up {format_name}") + + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + + kafka_create_topic(admin_client, topic_name) + + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + settings={"kafka_max_rows_per_message": max_rows_per_message}, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.{table_name}_view; + DROP TABLE IF EXISTS test.{table_name}; + + {create_query}; + + CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + """ + ) + + for format_name in formats_to_test: + logging.debug("Inserting to {format_name}") + + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + instance.query( + f"INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows})" + ) + + for format_name in formats_to_test: + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + messages = kafka_consume_with_retry( + kafka_cluster, topic_name, message_count, need_decode=False + ) + + assert len(messages) == message_count + + result = instance.query_with_retry( + f"SELECT * FROM test.{table_name}_view", + check_callback=lambda res: res == expected, + ) + + assert result == expected + kafka_delete_topic(admin_client, topic_name) + + +def test_kafka_duplicates_when_commit_failed( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] + kafka_produce(kafka_cluster, "duplicates_when_commit_failed", messages) + + instance.query( + """ + DROP TABLE IF EXISTS test.view SYNC; + DROP TABLE IF EXISTS test.consumer SYNC; + + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'duplicates_when_commit_failed', + kafka_group_name = 'duplicates_when_commit_failed', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 20, + kafka_flush_interval_ms = 1000; + + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree() + ORDER BY key; + """ + ) + + instance.query( + """ + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka + WHERE NOT sleepEachRow(0.25); + """ + ) + + instance.wait_for_log_line("Polled batch of 20 messages") + # the tricky part here is that disconnect should happen after write prefix, but before we do commit + # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" + # while materialized view is working to inject zookeeper failure + kafka_cluster.pause_container("kafka1") + + # if we restore the connection too fast (<30sec) librdkafka will not report any timeout + # (alternative is to decrease the default session timeouts for librdkafka) + # + # when the delay is too long (>50sec) broker will decide to remove us from the consumer group, + # and will start answering "Broker: Unknown member" + instance.wait_for_log_line( + "Exception during commit attempt: Local: Waiting for coordinator", timeout=45 + ) + instance.wait_for_log_line("All commit attempts failed", look_behind_lines=500) + + kafka_cluster.unpause_container("kafka1") + + # kafka_cluster.open_bash_shell('instance') + instance.wait_for_log_line("Committed offset 22") + + result = instance.query("SELECT count(), uniqExact(key), max(key) FROM test.view") + logging.debug(result) + + instance.query( + """ + DROP TABLE test.consumer SYNC; + DROP TABLE test.view SYNC; + """ + ) + + # After https://github.com/edenhill/librdkafka/issues/2631 + # timeout triggers rebalance, making further commits to the topic after getting back online + # impossible. So we have a duplicate in that scenario, but we report that situation properly. + assert TSV(result) == TSV("42\t22\t22") + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_commits_of_unprocessed_messages_on_drop( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "commits_of_unprocessed_messages_on_drop" + get_topic_postfix( + create_query_generator + ) + messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(1)] + + kafka_produce(kafka_cluster, topic_name, messages) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}_test_group", + settings={ + "kafka_max_block_size": 1000, + "kafka_flush_interval_ms": 1000, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.destination SYNC; + CREATE TABLE test.destination ( + key UInt64, + value UInt64, + _topic String, + _key String, + _offset UInt64, + _partition UInt64, + _timestamp Nullable(DateTime('UTC')), + _consumed_by LowCardinality(String) + ) + ENGINE = MergeTree() + ORDER BY key; + + {create_query}; + + CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS + SELECT + key, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.kafka; + """ + ) + + # Waiting for test.kafka_consumer to start consume + instance.wait_for_log_line("Committed offset [0-9]+") + + cancel = threading.Event() + + i = [2] + + def produce(): + while not cancel.is_set(): + messages = [] + for _ in range(113): + messages.append(json.dumps({"key": i[0], "value": i[0]})) + i[0] += 1 + kafka_produce(kafka_cluster, topic_name, messages) + time.sleep(0.5) + + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() + time.sleep(4) + + instance.query( + """ + DROP TABLE test.kafka SYNC; + """ + ) + + new_create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}_test_group", + settings={ + "kafka_max_block_size": 10000, + "kafka_flush_interval_ms": 1000, + }, + ) + instance.query(new_create_query) + + cancel.set() + instance.wait_for_log_line("kafka.*Stalled", repetitions=5) + + # kafka_cluster.open_bash_shell('instance') + # SELECT key, _timestamp, _offset FROM test.destination where runningDifference(key) <> 1 ORDER BY key; + + result = instance.query( + "SELECT count(), uniqExact(key), max(key) FROM test.destination" + ) + logging.debug(result) + + instance.query( + """ + DROP TABLE test.kafka_consumer SYNC; + DROP TABLE test.destination SYNC; + """ + ) + + kafka_thread.join() + assert TSV(result) == TSV("{0}\t{0}\t{0}".format(i[0] - 1)), "Missing data!" + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_produce_consume( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "insert2" + get_topic_postfix(create_query_generator) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + messages_num = 10000 + + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + insert_with_retry(instance, values) + + threads = [] + threads_num = 16 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + expected_row_count = messages_num * threads_num + result = instance.query_with_retry( + "SELECT count() FROM test.view", + sleep_time=1, + retry_count=20, + check_callback=lambda result: int(result) == expected_row_count, + ) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + for thread in threads: + thread.join() + + assert ( + int(result) == expected_row_count + ), "ClickHouse lost some messages: {}".format(result) + + +def test_system_kafka_consumers_rebalance( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + # based on test_kafka_consumer_hang2 + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + topic = "system_kafka_cons2" + kafka_create_topic(admin_client, topic, num_partitions=2) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + """ + ) + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) + time.sleep(3) + + # first consumer subscribe the topic, try to poll some data, and go to rest + instance.query("SELECT * FROM test.kafka") + + # second consumer do the same leading to rebalance in the first + # consumer, try to poll some data + instance.query("SELECT * FROM test.kafka2") + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 10, "value": 10}), partition=1) + time.sleep(3) + + instance.query("SELECT * FROM test.kafka") + instance.query("SELECT * FROM test.kafka2") + instance.query("SELECT * FROM test.kafka") + instance.query("SELECT * FROM test.kafka2") + + result_system_kafka_consumers = instance.query( + """ + create or replace function stable_timestamp as + (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, + assignments.current_offset, + if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, + if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, + stable_timestamp(last_poll_time) as last_poll_time_, stable_timestamp(last_commit_time) as last_commit_time_, + num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, + num_rebalance_revocations, num_rebalance_assignments, is_currently_used + FROM system.kafka_consumers WHERE database='test' and table IN ('kafka', 'kafka2') format Vertical; + """ + ) + logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") + assert ( + result_system_kafka_consumers + == """Row 1: +────── +database: test +table: kafka +length(consumer_id): 67 +assignments.topic: ['system_kafka_cons2'] +assignments.partition_id: [0] +assignments.current_offset: [2] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +last_commit_time_: now +num_commits: 2 +last_rebalance_time_: now +num_rebalance_revocations: 1 +num_rebalance_assignments: 2 +is_currently_used: 0 + +Row 2: +────── +database: test +table: kafka2 +length(consumer_id): 68 +assignments.topic: ['system_kafka_cons2'] +assignments.partition_id: [1] +assignments.current_offset: [2] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +last_commit_time_: now +num_commits: 1 +last_rebalance_time_: never +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 0 +""" + ) + + instance.query("DROP TABLE test.kafka") + instance.query("DROP TABLE test.kafka2") + + kafka_delete_topic(admin_client, topic) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_materialized_view( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "mv" + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + DROP TABLE IF EXISTS test.kafka; + + {create_query_generator("kafka", "key UInt64, value UInt64", topic_list=topic_name, consumer_group="mv")}; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, topic_name, messages) + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result = instance.query_with_retry( + "SELECT * FROM test.view", check_callback=kafka_check_result + ) + + kafka_check_result(result, True) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + DROP TABLE test.kafka; + """ + ) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_new_create_table_query, generate_old_create_table_query], +) +def test_kafka_materialized_view_with_subquery( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "mysq" + logging.debug(f"Using topic {topic_name}") + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM (SELECT * FROM test.kafka); + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, topic_name, messages) + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result = instance.query_with_retry( + "SELECT * FROM test.view", + check_callback=kafka_check_result, + retry_count=40, + sleep_time=0.75, + ) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + kafka_check_result(result, True) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_lot_of_partitions_partial_commit_of_bulk( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic_name = "topic_with_multiple_partitions2" + get_topic_postfix( + create_query_generator + ) + with kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 211, + "kafka_flush_interval_ms": 500, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + messages = [] + count = 0 + for dummy_msg in range(1000): + rows = [] + for dummy_row in range(random.randrange(3, 10)): + count = count + 1 + rows.append(json.dumps({"key": count, "value": count})) + messages.append("\n".join(rows)) + kafka_produce(kafka_cluster, topic_name, messages) + + instance.wait_for_log_line("kafka.*Stalled", repetitions=5) + + result = instance.query( + "SELECT count(), uniqExact(key), max(key) FROM test.view" + ) + logging.debug(result) + assert TSV(result) == TSV("{0}\t{0}\t{0}".format(count)) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + +# https://github.com/ClickHouse/ClickHouse/issues/26643 +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_issue26643( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + ) + topic_name = "test_issue26643" + get_topic_postfix(create_query_generator) + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + msg = message_with_repeated_pb2.Message( + tnow=1629000000, + server="server1", + clien="host1", + sPort=443, + cPort=50000, + r=[ + message_with_repeated_pb2.dd( + name="1", type=444, ttl=123123, data=b"adsfasd" + ), + message_with_repeated_pb2.dd(name="2"), + ], + method="GET", + ) + + data = b"" + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + msg = message_with_repeated_pb2.Message(tnow=1629000002) + + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + producer.send(topic_name, value=data) + + data = _VarintBytes(len(serialized_msg)) + serialized_msg + producer.send(topic_name, value=data) + producer.flush() + + create_query = create_query_generator( + "test_queue", + """`tnow` UInt32, + `server` String, + `client` String, + `sPort` UInt16, + `cPort` UInt16, + `r.name` Array(String), + `r.class` Array(UInt16), + `r.type` Array(UInt16), + `r.ttl` Array(UInt32), + `r.data` Array(String), + `method` String""", + topic_list=topic_name, + consumer_group=f"{topic_name}_group", + format="Protobuf", + settings={ + "kafka_schema": "message_with_repeated.proto:Message", + "kafka_skip_broken_messages": 10000, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) + + instance.query( + f""" + {create_query}; + + SET allow_suspicious_low_cardinality_types=1; + + CREATE TABLE test.log + ( + `tnow` DateTime('Asia/Istanbul') CODEC(DoubleDelta, LZ4), + `server` LowCardinality(String), + `client` LowCardinality(String), + `sPort` LowCardinality(UInt16), + `cPort` UInt16 CODEC(T64, LZ4), + `r.name` Array(String), + `r.class` Array(LowCardinality(UInt16)), + `r.type` Array(LowCardinality(UInt16)), + `r.ttl` Array(LowCardinality(UInt32)), + `r.data` Array(String), + `method` LowCardinality(String) + ) + ENGINE = MergeTree + PARTITION BY toYYYYMMDD(tnow) + ORDER BY (tnow, server) + TTL toDate(tnow) + toIntervalMonth(1000) + SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; + + CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS + SELECT + toDateTime(a.tnow) AS tnow, + a.server AS server, + a.client AS client, + a.sPort AS sPort, + a.cPort AS cPort, + a.`r.name` AS `r.name`, + a.`r.class` AS `r.class`, + a.`r.type` AS `r.type`, + a.`r.ttl` AS `r.ttl`, + a.`r.data` AS `r.data`, + a.method AS method + FROM test.test_queue AS a; + """ + ) + + instance.wait_for_log_line("Committed offset") + result = instance.query("SELECT * FROM test.log") + + expected = """\ + 2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET + 2021-08-15 07:00:02 0 0 [] [] [] [] [] + 2021-08-15 07:00:02 0 0 [] [] [] [] [] + """ + assert TSV(result) == TSV(expected) + + +def test_kafka_csv_with_thread_per_consumer( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'csv_with_thread_per_consumer', + kafka_group_name = 'csv_with_thread_per_consumer', + kafka_format = 'CSV', + kafka_row_delimiter = '\\n', + kafka_num_consumers = 4, + kafka_commit_on_select = 1, + kafka_thread_per_consumer = 1; + """ + ) + + messages = [] + for i in range(50): + messages.append("{i}, {i}".format(i=i)) + kafka_produce(kafka_cluster, "csv_with_thread_per_consumer", messages) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + +def test_kafka_select_empty( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + admin_client = get_admin_client(kafka_cluster) + topic_name = "empty" + kafka_create_topic(admin_client, topic_name) + + instance.query( + f""" + CREATE TABLE test.kafka (key UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic_name}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic_name}', + kafka_format = 'TSV', + kafka_row_delimiter = '\\n'; + """ + ) + + assert int(instance.query("SELECT count() FROM test.kafka")) == 0 + kafka_delete_topic(admin_client, topic_name) + + +def test_kafka_issue11308( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + # Check that matview does respect Kafka SETTINGS + kafka_produce( + kafka_cluster, + "issue11308", + [ + '{"t": 123, "e": {"x": "woof"} }', + '{"t": 123, "e": {"x": "woof"} }', + '{"t": 124, "e": {"x": "test"} }', + ], + ) + + instance.query( + """ + CREATE TABLE test.persistent_kafka ( + time UInt64, + some_string String + ) + ENGINE = MergeTree() + ORDER BY time; + + CREATE TABLE test.kafka (t UInt64, `e.x` String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'issue11308', + kafka_group_name = 'issue11308', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\\n', + kafka_flush_interval_ms=1000, + input_format_import_nested_json = 1; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.persistent_kafka AS + SELECT + `t` AS `time`, + `e.x` AS `some_string` + FROM test.kafka; + """ + ) + + while int(instance.query("SELECT count() FROM test.persistent_kafka")) < 3: + time.sleep(1) + + result = instance.query("SELECT * FROM test.persistent_kafka ORDER BY time;") + + instance.query( + """ + DROP TABLE test.persistent_kafka; + DROP TABLE test.persistent_kafka_mv; + """ + ) + + expected = """\ +123 woof +123 woof +124 test +""" + assert TSV(result) == TSV(expected) + + +def test_kafka_protobuf(kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance): + kafka_produce_protobuf_messages(kafka_cluster, "pb", 0, 20) + kafka_produce_protobuf_messages(kafka_cluster, "pb", 20, 1) + kafka_produce_protobuf_messages(kafka_cluster, "pb", 21, 29) + + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'pb', + kafka_group_name = 'pb', + kafka_format = 'Protobuf', + kafka_commit_on_select = 1, + kafka_schema = 'kafka.proto:KeyValuePair'; + """ + ) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + +def test_kafka_predefined_configuration( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + topic_name = "conf" + kafka_create_topic(admin_client, topic_name) + + messages = [] + for i in range(50): + messages.append("{i}, {i}".format(i=i)) + kafka_produce(kafka_cluster, topic_name, messages) + + instance.query( + f""" + CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka(kafka1, kafka_format='CSV'); + """ + ) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + kafka_check_result(result, True) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_insert_avro( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + topic_config = { + # default retention, since predefined timestamp_ms is used. + "retention.ms": "-1", + } + topic_name = "avro1" + get_topic_postfix(create_query_generator) + with kafka_topic(admin_client, topic_name, config=topic_config): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64, _timestamp DateTime('UTC')", + topic_list=topic_name, + consumer_group=topic_name, + format="Avro", + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + {create_query} + """ + ) + + instance.query( + "INSERT INTO test.kafka select number*10 as key, number*100 as value, 1636505534 as _timestamp from numbers(4) SETTINGS output_format_avro_rows_in_file = 2, output_format_avro_codec = 'deflate'" + ) + + message_count = 2 + messages = kafka_consume_with_retry( + kafka_cluster, + topic_name, + message_count, + need_decode=False, + timestamp=1636505534, + ) + + result = "" + for a_message in messages: + result += decode_avro(a_message) + "\n" + + expected_result = """{'key': 0, 'value': 0, '_timestamp': 1636505534} +{'key': 10, 'value': 100, '_timestamp': 1636505534} + +{'key': 20, 'value': 200, '_timestamp': 1636505534} +{'key': 30, 'value': 300, '_timestamp': 1636505534} + +""" + assert result == expected_result + + +def test_exception_from_destructor( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'xyz', + kafka_group_name = '', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + """ + ) + instance.query_and_get_error( + """ + SELECT * FROM test.kafka; + """ + ) + instance.query( + """ + DROP TABLE test.kafka; + """ + ) + + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'xyz', + kafka_group_name = '', + kafka_format = 'JSONEachRow'; + """ + ) + instance.query( + """ + DROP TABLE test.kafka; + """ + ) + + # kafka_cluster.open_bash_shell('instance') + assert TSV(instance.query("SELECT 1")) == TSV("1") + + +if __name__ == "__main__": + init_cluster_and_instance() + conftest_cluster.start() + input("Cluster created, press any key to destroy...") + conftest_cluster.shutdown() diff --git a/tests/integration/test_storage_kafka/test_2.py b/tests/integration/test_storage_kafka/test_2.py new file mode 100644 index 00000000000..8dc45126312 --- /dev/null +++ b/tests/integration/test_storage_kafka/test_2.py @@ -0,0 +1,1232 @@ +import io +import json +import logging +import math +import random +import time + +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient, KafkaProducer +from test_storage_kafka.conftest import ( + KAFKA_CONSUMER_GROUP_NEW, + KAFKA_CONSUMER_GROUP_OLD, + KAFKA_TOPIC_OLD, + conftest_cluster, + init_cluster_and_instance, +) +from test_storage_kafka.kafka_tests_utils import ( + describe_consumer_group, + existing_kafka_topic, + generate_new_create_table_query, + generate_old_create_table_query, + get_admin_client, + get_topic_postfix, + insert_with_retry, + kafka_check_result, + kafka_consume_with_retry, + kafka_create_topic, + kafka_delete_topic, + kafka_produce, + kafka_produce_protobuf_social, + kafka_topic, + must_use_thread_per_consumer, + producer_serializer, +) + +# Skip all tests on ARM +if is_arm(): + pytestmark = pytest.mark.skip + + +def gen_normal_json(): + return '{"i":1000, "s":"ABC123abc"}' + + +def gen_malformed_json(): + return '{"i":"n1000", "s":"1000"}' + + +def gen_message_with_jsons(jsons=10, malformed=0): + s = io.StringIO() + + # we don't care on which position error will be added + # (we skip whole broken message), but we need to be + # sure that at least one error will be added, + # otherwise test will fail. + error_pos = random.randint(0, jsons - 1) + + for i in range(jsons): + if malformed and i == error_pos: + s.write(gen_malformed_json()) + else: + s.write(gen_normal_json()) + s.write(" ") + return s.getvalue() + + +@pytest.mark.parametrize( + "create_query_generator, do_direct_read", + [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], +) +def test_kafka_unavailable( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + do_direct_read, +): + number_of_messages = 20000 + topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) + messages = [ + json.dumps({"key": j + 1, "value": j + 1}) for j in range(number_of_messages) + ] + kafka_produce(kafka_cluster, topic_name, messages) + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + kafka_cluster.pause_container("kafka1") + + create_query = create_query_generator( + "test_bad_reschedule", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={"kafka_max_block_size": 1000}, + ) + instance.query( + f""" + {create_query}; + + CREATE MATERIALIZED VIEW test.destination_unavailable ENGINE=MergeTree ORDER BY tuple() AS + SELECT + key, + now() as consume_ts, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.test_bad_reschedule; + """ + ) + + if do_direct_read: + instance.query("SELECT * FROM test.test_bad_reschedule") + instance.query("SELECT count() FROM test.destination_unavailable") + + # enough to trigger issue + time.sleep(30) + kafka_cluster.unpause_container("kafka1") + + result = instance.query_with_retry( + "SELECT count() FROM test.destination_unavailable", + sleep_time=1, + check_callback=lambda res: int(res) == number_of_messages, + ) + + assert int(result) == number_of_messages + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_bad_reschedule( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) + + messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(20000)] + kafka_produce(kafka_cluster, topic_name, messages) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 1000, + "kafka_flush_interval_ms": 1000, + }, + ) + instance.query( + f""" + {create_query}; + + CREATE MATERIALIZED VIEW test.destination ENGINE=MergeTree ORDER BY tuple() AS + SELECT + key, + now() as consume_ts, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.kafka; + """ + ) + + instance.wait_for_log_line("Committed offset 20000") + + assert ( + int( + instance.query( + "SELECT max(consume_ts) - min(consume_ts) FROM test.destination" + ) + ) + < 8 + ) + + +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_old_create_table_query, "Committed offset {offset}"), + ( + generate_new_create_table_query, + r"kafka.*Saved offset [0-9]+ for topic-partition \[{topic}:[0-9]+\]", + ), + ], +) +def test_librdkafka_compression( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, +): + """ + Regression for UB in snappy-c (that is used in librdkafka), + backport pr is [1]. + + [1]: https://github.com/ClickHouse-Extras/librdkafka/pull/3 + + Example of corruption: + + 2020.12.10 09:59:56.831507 [ 20 ] {} void DB::StorageKafka::threadFunc(size_t): Code: 27. DB::Exception: Cannot parse input: expected '"' before: 'foo"}': (while reading the value of key value): (at row 1) + + To trigger this regression there should duplicated messages + + Orignal reproducer is: + $ gcc --version |& fgrep gcc + gcc (GCC) 10.2.0 + $ yes foobarbaz | fold -w 80 | head -n10 >| in-… + $ make clean && make CFLAGS='-Wall -g -O2 -ftree-loop-vectorize -DNDEBUG=1 -DSG=1 -fPIC' + $ ./verify in + final comparision of in failed at 20 of 100 + + """ + + supported_compression_types = ["gzip", "snappy", "lz4", "zstd", "uncompressed"] + + messages = [] + expected = [] + + value = "foobarbaz" * 10 + number_of_messages = 50 + for i in range(number_of_messages): + messages.append(json.dumps({"key": i, "value": value})) + expected.append(f"{i}\t{value}") + + expected = "\n".join(expected) + + admin_client = get_admin_client(kafka_cluster) + + for compression_type in supported_compression_types: + logging.debug(("Check compression {}".format(compression_type))) + + topic_name = "test_librdkafka_compression_{}".format(compression_type) + topic_config = {"compression.type": compression_type} + with kafka_topic(admin_client, topic_name, config=topic_config): + instance.query( + """{create_query}; + + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """.format( + create_query=create_query_generator( + "kafka", + "key UInt64, value String", + topic_list=topic_name, + format="JSONEachRow", + settings={"kafka_flush_interval_ms": 1000}, + ), + ) + ) + + kafka_produce(kafka_cluster, topic_name, messages) + + instance.wait_for_log_line( + log_line.format(offset=number_of_messages, topic=topic_name) + ) + result = instance.query("SELECT * FROM test.view") + assert TSV(result) == TSV(expected) + + instance.query("DROP TABLE test.kafka SYNC") + instance.query("DROP TABLE test.consumer SYNC") + instance.query("DROP TABLE test.view SYNC") + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_multiple_read_in_materialized_views( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "multiple_read_from_mv" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + create_query = create_query_generator( + "kafka_multiple_read_input", + "id Int64", + topic_list=topic_name, + consumer_group=topic_name, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka_multiple_read_input SYNC; + DROP TABLE IF EXISTS test.kafka_multiple_read_table; + DROP TABLE IF EXISTS test.kafka_multiple_read_mv; + + {create_query}; + + CREATE TABLE test.kafka_multiple_read_table (id Int64) + ENGINE = MergeTree + ORDER BY id; + + + CREATE MATERIALIZED VIEW test.kafka_multiple_read_mv TO test.kafka_multiple_read_table AS + SELECT id + FROM test.kafka_multiple_read_input + WHERE id NOT IN ( + SELECT id + FROM test.kafka_multiple_read_table + WHERE id IN ( + SELECT id + FROM test.kafka_multiple_read_input + ) + ); + """ + ) + + kafka_produce( + kafka_cluster, topic_name, [json.dumps({"id": 42}), json.dumps({"id": 43})] + ) + + expected_result = "42\n43\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + # Verify that the query deduplicates the records as it meant to be + messages = [] + for _ in range(0, 10): + messages.append(json.dumps({"id": 42})) + messages.append(json.dumps({"id": 43})) + + messages.append(json.dumps({"id": 44})) + + kafka_produce(kafka_cluster, topic_name, messages) + + expected_result = "42\n43\n44\n" + res = instance.query_with_retry( + f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", + check_callback=lambda res: res == expected_result, + ) + assert res == expected_result + + instance.query( + f""" + DROP TABLE test.kafka_multiple_read_input; + DROP TABLE test.kafka_multiple_read_table; + DROP TABLE test.kafka_multiple_read_mv; + """ + ) + + +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_old_create_table_query, "kafka.*Committed offset 2.*virt2_[01]"), + ( + generate_new_create_table_query, + r"kafka.*Saved offset 2 for topic-partition \[virt2_[01]:[0-9]+", + ), + ], +) +def test_kafka_virtual_columns2( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, +): + admin_client = get_admin_client(kafka_cluster) + + topic_config = { + # default retention, since predefined timestamp_ms is used. + "retention.ms": "-1", + } + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) + topic_name_0 = "virt2_0" + topic_name_1 = "virt2_1" + consumer_group = "virt2" + get_topic_postfix(create_query_generator) + with kafka_topic(admin_client, topic_name_0, num_partitions=2, config=topic_config): + with kafka_topic( + admin_client, topic_name_1, num_partitions=2, config=topic_config + ): + create_query = create_query_generator( + "kafka", + "value UInt64", + topic_list=f"{topic_name_0},{topic_name_1}", + consumer_group=consumer_group, + settings={ + "kafka_num_consumers": 2, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) + + instance.query( + f""" + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS + SELECT value, _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp), toUnixTimestamp64Milli(_timestamp_ms), _headers.name, _headers.value FROM test.kafka; + """ + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 1}), + partition=0, + key="k1", + timestamp_ms=1577836801001, + headers=[("content-encoding", b"base64")], + ) + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 2}), + partition=0, + key="k2", + timestamp_ms=1577836802002, + headers=[ + ("empty_value", b""), + ("", b"empty name"), + ("", b""), + ("repetition", b"1"), + ("repetition", b"2"), + ], + ) + producer.flush() + + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 3}), + partition=1, + key="k3", + timestamp_ms=1577836803003, + headers=[("b", b"b"), ("a", b"a")], + ) + producer.send( + topic=topic_name_0, + value=json.dumps({"value": 4}), + partition=1, + key="k4", + timestamp_ms=1577836804004, + headers=[("a", b"a"), ("b", b"b")], + ) + producer.flush() + + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 5}), + partition=0, + key="k5", + timestamp_ms=1577836805005, + ) + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 6}), + partition=0, + key="k6", + timestamp_ms=1577836806006, + ) + producer.flush() + + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 7}), + partition=1, + key="k7", + timestamp_ms=1577836807007, + ) + producer.send( + topic=topic_name_1, + value=json.dumps({"value": 8}), + partition=1, + key="k8", + timestamp_ms=1577836808008, + ) + producer.flush() + + instance.wait_for_log_line(log_line, repetitions=4, look_behind_lines=6000) + + describe_consumer_group(kafka_cluster, consumer_group) + # pprint.pprint(members) + # members[0]['client_id'] = 'ClickHouse-instance-test-kafka-0' + # members[1]['client_id'] = 'ClickHouse-instance-test-kafka-1' + + result = instance.query( + "SELECT * FROM test.view ORDER BY value", ignore_error=True + ) + + expected = f"""\ + 1 k1 {topic_name_0} 0 0 1577836801 1577836801001 ['content-encoding'] ['base64'] + 2 k2 {topic_name_0} 0 1 1577836802 1577836802002 ['empty_value','','','repetition','repetition'] ['','empty name','','1','2'] + 3 k3 {topic_name_0} 1 0 1577836803 1577836803003 ['b','a'] ['b','a'] + 4 k4 {topic_name_0} 1 1 1577836804 1577836804004 ['a','b'] ['a','b'] + 5 k5 {topic_name_1} 0 0 1577836805 1577836805005 [] [] + 6 k6 {topic_name_1} 0 1 1577836806 1577836806006 [] [] + 7 k7 {topic_name_1} 1 0 1577836807 1577836807007 [] [] + 8 k8 {topic_name_1} 1 1 1577836808 1577836808008 [] [] + """ + + assert TSV(result) == TSV(expected) + + instance.query( + """ + DROP TABLE test.kafka; + DROP TABLE test.view; + """ + ) + instance.rotate_logs() + + +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_new_create_table_query, "Saved offset 5"), + (generate_old_create_table_query, "Committed offset 5"), + ], +) +def test_kafka_produce_key_timestamp( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, +): + topic_name = "insert3" + topic_config = { + # default retention, since predefined timestamp_ms is used. + "retention.ms": "-1", + } + + with kafka_topic(get_admin_client(kafka_cluster), topic_name, config=topic_config): + writer_create_query = create_query_generator( + "kafka_writer", + "key UInt64, value UInt64, _key String, _timestamp DateTime('UTC')", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + reader_create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64, inserted_key String, inserted_timestamp DateTime('UTC')", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {writer_create_query}; + {reader_create_query}; + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS + SELECT key, value, inserted_key, toUnixTimestamp(inserted_timestamp), _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp) FROM test.kafka; + """ + ) + + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( + 1, 1, "k1", 1577836801 + ) + ) + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( + 2, 2, "k2", 1577836802 + ) + ) + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({})),({},{},'{}',toDateTime({}))".format( + 3, 3, "k3", 1577836803, 4, 4, "k4", 1577836804 + ) + ) + instance.query( + "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( + 5, 5, "k5", 1577836805 + ) + ) + + # instance.wait_for_log_line(log_line) + + expected = """\ + 1 1 k1 1577836801 k1 insert3 0 0 1577836801 + 2 2 k2 1577836802 k2 insert3 0 1 1577836802 + 3 3 k3 1577836803 k3 insert3 0 2 1577836803 + 4 4 k4 1577836804 k4 insert3 0 3 1577836804 + 5 5 k5 1577836805 k5 insert3 0 4 1577836805 + """ + + result = instance.query_with_retry( + "SELECT * FROM test.view ORDER BY value", + ignore_error=True, + retry_count=5, + sleep_time=1, + check_callback=lambda res: TSV(res) == TSV(expected), + ) + + assert TSV(result) == TSV(expected) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = ( + "kafka_engine_put_errors_to_stream_with_random_malformed_json" + + get_topic_postfix(create_query_generator) + ) + create_query = create_query_generator( + "kafka", + "i Int64, s String", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 100, + "kafka_poll_max_batch_size": 1, + "kafka_handle_error_mode": "stream", + }, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka_data; + DROP TABLE IF EXISTS test.kafka_errors; + {create_query}; + CREATE MATERIALIZED VIEW test.kafka_data (i Int64, s String) + ENGINE = MergeTree + ORDER BY i + AS SELECT i, s FROM test.kafka WHERE length(_error) == 0; + CREATE MATERIALIZED VIEW test.kafka_errors (topic String, partition Int64, offset Int64, raw String, error String) + ENGINE = MergeTree + ORDER BY (topic, offset) + AS SELECT + _topic AS topic, + _partition AS partition, + _offset AS offset, + _raw_message AS raw, + _error AS error + FROM test.kafka WHERE length(_error) > 0; + """ + ) + + messages = [] + for i in range(128): + if i % 2 == 0: + messages.append(gen_message_with_jsons(10, 1)) + else: + messages.append(gen_message_with_jsons(10, 0)) + + kafka_produce(kafka_cluster, topic_name, messages) + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + instance.wait_for_log_line("Committed offset 128") + # 64 good messages, each containing 10 rows + assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("640") + # 64 bad messages, each containing some broken row + assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") + + instance.query( + """ + DROP TABLE test.kafka; + DROP TABLE test.kafka_data; + DROP TABLE test.kafka_errors; + """ + ) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_produce_consume_avro( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "insert_avro" + get_topic_postfix(create_query_generator) + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + num_rows = 75 + + writer_create_query = create_query_generator( + "kafka_writer", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="Avro", + ) + + reader_create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="Avro", + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka_writer; + + {writer_create_query}; + {reader_create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS + SELECT key, value FROM test.kafka; + """ + ) + + instance.query( + "INSERT INTO test.kafka_writer select number*10 as key, number*100 as value from numbers({num_rows}) SETTINGS output_format_avro_rows_in_file = 7".format( + num_rows=num_rows + ) + ) + + instance.wait_for_log_line( + "Committed offset {offset}".format(offset=math.ceil(num_rows / 7)) + ) + + expected_num_rows = instance.query( + "SELECT COUNT(1) FROM test.view", ignore_error=True + ) + assert int(expected_num_rows) == num_rows + + expected_max_key = instance.query( + "SELECT max(key) FROM test.view", ignore_error=True + ) + assert int(expected_max_key) == (num_rows - 1) * 10 + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_virtual_columns_with_materialized_view( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_config = { + # default retention, since predefined timestamp_ms is used. + "retention.ms": "-1", + } + # the topic name is hardcoded in reference, it doesn't worth to create two reference files to have separate topics, + # as the context manager will always clean up the topic + topic_name = "virt2" + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}-group", + ) + with kafka_topic(get_admin_client(kafka_cluster), topic_name, config=topic_config): + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64, partition UInt64, timestamp Nullable(DateTime('UTC'))) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT *, _key as kafka_key, _topic as topic, _offset as offset, _partition as partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) as timestamp FROM test.kafka; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, topic_name, messages, 0) + + def check_callback(result): + return kafka_check_result(result, False, "test_kafka_virtual2.reference") + + result = instance.query_with_retry( + "SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view ORDER BY kafka_key, key", + check_callback=check_callback, + ) + + kafka_check_result(result, True, "test_kafka_virtual2.reference") + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + +def test_system_kafka_consumers_rebalance_mv( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance, max_retries=15 +): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + + topic = "system_kafka_cons_mv" + kafka_create_topic(admin_client, topic, num_partitions=2) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka2; + DROP TABLE IF EXISTS test.kafka_persistent; + DROP TABLE IF EXISTS test.kafka_persistent2; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic}', + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka_persistent (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.kafka_persistent2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.kafka_persistent AS + SELECT key, value + FROM test.kafka; + + CREATE MATERIALIZED VIEW test.persistent_kafka_mv2 TO test.kafka_persistent2 AS + SELECT key, value + FROM test.kafka2; + """ + ) + + producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) + producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) + time.sleep(3) + + retries = 0 + result_rdkafka_stat = "" + while True: + result_rdkafka_stat = instance.query( + """ + SELECT table, JSONExtractString(rdkafka_stat, 'type') + FROM system.kafka_consumers WHERE database='test' and table = 'kafka' format Vertical; + """ + ) + if result_rdkafka_stat.find("consumer") or retries > max_retries: + break + retries += 1 + time.sleep(1) + + assert ( + result_rdkafka_stat + == """Row 1: +────── +table: kafka +JSONExtractString(rdkafka_stat, 'type'): consumer +""" + ) + + instance.query("DROP TABLE test.kafka") + instance.query("DROP TABLE test.kafka2") + instance.query("DROP TABLE test.kafka_persistent") + instance.query("DROP TABLE test.kafka_persistent2") + instance.query("DROP TABLE test.persistent_kafka_mv") + instance.query("DROP TABLE test.persistent_kafka_mv2") + + kafka_delete_topic(admin_client, topic) + + +def test_system_kafka_consumers( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic = "system_kafka_cons" + kafka_create_topic(admin_client, topic) + + # Check that format_csv_delimiter parameter works now - as part of all available format settings. + kafka_produce( + kafka_cluster, + topic, + ["1|foo", "2|bar", "42|answer", "100|multi\n101|row\n103|message"], + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (a UInt64, b String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_commit_on_select = 1, + kafka_format = 'CSV', + kafka_row_delimiter = '\\n', + format_csv_delimiter = '|'; + """ + ) + + result = instance.query("SELECT * FROM test.kafka ORDER BY a;") + + result_system_kafka_consumers = instance.query( + """ + create or replace function stable_timestamp as + (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + + SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, + assignments.current_offset, + if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, + if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, + stable_timestamp(last_poll_time) as last_poll_time_, num_messages_read, stable_timestamp(last_commit_time) as last_commit_time_, + num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, + num_rebalance_revocations, num_rebalance_assignments, is_currently_used + FROM system.kafka_consumers WHERE database='test' and table='kafka' format Vertical; + """ + ) + logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") + assert ( + result_system_kafka_consumers + == """Row 1: +────── +database: test +table: kafka +length(consumer_id): 67 +assignments.topic: ['system_kafka_cons'] +assignments.partition_id: [0] +assignments.current_offset: [4] +last_exception_time_: never +last_exception_: no exception +last_poll_time_: now +num_messages_read: 4 +last_commit_time_: now +num_commits: 1 +last_rebalance_time_: never +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 0 +""" + ) + + instance.query("DROP TABLE test.kafka") + kafka_delete_topic(admin_client, topic) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_insert( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "insert1" + get_topic_postfix(create_query_generator) + + instance.query( + create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="TSV", + ) + ) + + message_count = 50 + values = [] + for i in range(message_count): + values.append("({i}, {i})".format(i=i)) + values = ",".join(values) + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + insert_with_retry(instance, values) + + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + result = "\n".join(messages) + kafka_check_result(result, True) + + +def test_kafka_settings_old_syntax( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + assert TSV( + instance.query( + "SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro", + ignore_error=True, + ) + ) == TSV( + f"""kafka_broker kafka1 +kafka_client_id instance +kafka_format_json_each_row JSONEachRow +kafka_group_name_new {KAFKA_CONSUMER_GROUP_NEW} +kafka_group_name_old {KAFKA_CONSUMER_GROUP_OLD} +kafka_topic_new new_t +kafka_topic_old old_t +""" + ) + + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n') + SETTINGS kafka_commit_on_select = 1; + """ + ) + + # Don't insert malformed messages since old settings syntax + # doesn't support skipping of broken messages. + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, KAFKA_TOPIC_OLD, messages) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_OLD) + assert members[0]["client_id"] == "ClickHouse-instance-test-kafka" + # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose")) + + +def test_kafka_json_without_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + messages = "" + for i in range(25): + messages += json.dumps({"key": i, "value": i}) + "\n" + kafka_produce(kafka_cluster, "json", [messages]) + + messages = "" + for i in range(25, 50): + messages += json.dumps({"key": i, "value": i}) + "\n" + kafka_produce(kafka_cluster, "json", [messages]) + + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'json', + kafka_group_name = 'json', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + """ + ) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + +def test_kafka_string_field_on_first_position_in_protobuf( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + # https://github.com/ClickHouse/ClickHouse/issues/12615 + kafka_produce_protobuf_social( + kafka_cluster, "string_field_on_first_position_in_protobuf", 0, 20 + ) + kafka_produce_protobuf_social( + kafka_cluster, "string_field_on_first_position_in_protobuf", 20, 1 + ) + kafka_produce_protobuf_social( + kafka_cluster, "string_field_on_first_position_in_protobuf", 21, 29 + ) + + instance.query( + """ +CREATE TABLE test.kafka ( + username String, + timestamp Int32 + ) ENGINE = Kafka() +SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'string_field_on_first_position_in_protobuf', + kafka_group_name = 'string_field_on_first_position_in_protobuf', + kafka_format = 'Protobuf', + kafka_commit_on_select = 1, + kafka_schema = 'social:User'; + """ + ) + + result = instance.query("SELECT * FROM test.kafka", ignore_error=True) + expected = """\ +John Doe 0 1000000 +John Doe 1 1000001 +John Doe 2 1000002 +John Doe 3 1000003 +John Doe 4 1000004 +John Doe 5 1000005 +John Doe 6 1000006 +John Doe 7 1000007 +John Doe 8 1000008 +John Doe 9 1000009 +John Doe 10 1000010 +John Doe 11 1000011 +John Doe 12 1000012 +John Doe 13 1000013 +John Doe 14 1000014 +John Doe 15 1000015 +John Doe 16 1000016 +John Doe 17 1000017 +John Doe 18 1000018 +John Doe 19 1000019 +John Doe 20 1000020 +John Doe 21 1000021 +John Doe 22 1000022 +John Doe 23 1000023 +John Doe 24 1000024 +John Doe 25 1000025 +John Doe 26 1000026 +John Doe 27 1000027 +John Doe 28 1000028 +John Doe 29 1000029 +John Doe 30 1000030 +John Doe 31 1000031 +John Doe 32 1000032 +John Doe 33 1000033 +John Doe 34 1000034 +John Doe 35 1000035 +John Doe 36 1000036 +John Doe 37 1000037 +John Doe 38 1000038 +John Doe 39 1000039 +John Doe 40 1000040 +John Doe 41 1000041 +John Doe 42 1000042 +John Doe 43 1000043 +John Doe 44 1000044 +John Doe 45 1000045 +John Doe 46 1000046 +John Doe 47 1000047 +John Doe 48 1000048 +John Doe 49 1000049 +""" + assert TSV(result) == TSV(expected) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_block_based_formats_1( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "pretty_space" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="PrettySpace", + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + + {create_query}; + + INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0, output_format_pretty_color=1, output_format_pretty_row_numbers=0; + """ + ) + + message_count = 3 + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + assert len(messages) == 3 + + data = [] + for message in messages: + splitted = message.split("\n") + assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" + assert splitted[1] == "" + assert splitted[-1] == "" + data += [line.split() for line in splitted[2:-1]] + + assert data == [ + ["0", "0"], + ["10", "100"], + ["20", "200"], + ["30", "300"], + ["40", "400"], + ] + + +if __name__ == "__main__": + init_cluster_and_instance() + conftest_cluster.start() + input("Cluster created, press any key to destroy...") + conftest_cluster.shutdown() diff --git a/tests/integration/test_storage_kafka/test_3.py b/tests/integration/test_storage_kafka/test_3.py new file mode 100644 index 00000000000..8a43014a5de --- /dev/null +++ b/tests/integration/test_storage_kafka/test_3.py @@ -0,0 +1,1088 @@ +import json +import logging +import random +import string +import threading +import time + +import kafka.errors +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm +from helpers.network import PartitionManager +from helpers.test_tools import TSV +from kafka import KafkaAdminClient +from test_storage_kafka.conftest import ( + KAFKA_CONSUMER_GROUP_NEW, + KAFKA_TOPIC_NEW, + conftest_cluster, + init_cluster_and_instance, +) +from test_storage_kafka.kafka_tests_utils import ( + describe_consumer_group, + existing_kafka_topic, + generate_new_create_table_query, + generate_old_create_table_query, + get_admin_client, + get_topic_postfix, + kafka_check_result, + kafka_consume_with_retry, + kafka_create_topic, + kafka_delete_topic, + kafka_produce, + kafka_topic, + must_use_thread_per_consumer, +) + +# Skip all tests on ARM +if is_arm(): + pytestmark = pytest.mark.skip + + +def random_string(size=8): + return "".join(random.choices(string.ascii_uppercase + string.digits, k=size)) + + +def test_formats_errors(kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + formats_to_test = [ + "Template", + "Regexp", + "TSV", + "TSVWithNamesAndTypes", + "TSKV", + "CSV", + "CSVWithNames", + "CSVWithNamesAndTypes", + "CustomSeparated", + "CustomSeparatedWithNames", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONEachRow", + "JSONStringsEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "Avro", + "RowBinary", + "RowBinaryWithNamesAndTypes", + "MsgPack", + "JSONColumns", + "JSONCompactColumns", + "JSONColumnsWithMetadata", + "BSONEachRow", + "Native", + "Arrow", + "Parquet", + "ORC", + "Npy", + "ParquetMetadata", + "CapnProto", + "Protobuf", + "ProtobufSingle", + "ProtobufList", + "DWARF", + "HiveText", + "MySQLDump", + ] + + for format_name in formats_to_test: + kafka_create_topic(admin_client, format_name) + table_name = f"kafka_{format_name}" + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; + + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{format_name}', + kafka_group_name = '{format_name}', + kafka_format = '{format_name}', + kafka_max_rows_per_message = 5, + format_template_row='template_row.format', + format_regexp='id: (.+?)', + input_format_with_names_use_header=0, + format_schema='key_value_message:Message'; + + CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + """ + ) + + kafka_produce( + kafka_cluster, + format_name, + ["Broken message\nBroken message\nBroken message\n"], + ) + + for format_name in formats_to_test: + table_name = f"kafka_{format_name}" + num_errors = int( + instance.query_with_retry( + f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", + check_callback=lambda res: int(res) > 0, + ) + ) + + assert num_errors > 0, f"Failed {format_name}" + + instance.query(f"DROP TABLE test.{table_name}") + instance.query(f"DROP TABLE test.{table_name}_view") + kafka_delete_topic(admin_client, format_name) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_block_based_formats_2( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + admin_client = get_admin_client(kafka_cluster) + num_rows = 100 + message_count = 9 + + for format_name in [ + "JSONColumns", + "Native", + "Arrow", + "Parquet", + "ORC", + "JSONCompactColumns", + ]: + topic_name = format_name + get_topic_postfix(create_query_generator) + table_name = f"kafka_{format_name}" + logging.debug(f"Checking format {format_name}") + with kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format=format_name, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.{table_name}; + + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.{table_name}; + + INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; + """ + ) + messages = kafka_consume_with_retry( + kafka_cluster, topic_name, message_count, need_decode=False + ) + assert len(messages) == message_count + + rows = int( + instance.query_with_retry( + "SELECT count() FROM test.view", + check_callback=lambda res: int(res) == num_rows, + ) + ) + + assert rows == num_rows + + result = instance.query("SELECT * FROM test.view ORDER by key") + expected = "" + for i in range(num_rows): + expected += str(i * 10) + "\t" + str(i * 100) + "\n" + assert result == expected + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_flush_on_big_message( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + # Create batches of messages of size ~100Kb + kafka_messages = 1000 + batch_messages = 1000 + topic_name = "flush" + get_topic_postfix(create_query_generator) + messages = [ + json.dumps({"key": i, "value": "x" * 100}) * batch_messages + for i in range(kafka_messages) + ] + kafka_produce(kafka_cluster, topic_name, messages) + + admin_client = get_admin_client(kafka_cluster) + + with existing_kafka_topic(admin_client, topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value String", + topic_list=topic_name, + consumer_group=topic_name, + settings={"kafka_max_block_size": 10}, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + received = False + while not received: + try: + offsets = admin_client.list_consumer_group_offsets(topic_name) + for topic, offset in list(offsets.items()): + if topic.topic == topic_name and offset.offset == kafka_messages: + received = True + break + except kafka.errors.GroupCoordinatorNotAvailableError: + continue + + while True: + result = instance.query("SELECT count() FROM test.view") + if int(result) == kafka_messages * batch_messages: + break + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + assert ( + int(result) == kafka_messages * batch_messages + ), "ClickHouse lost some messages: {}".format(result) + + +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + ( + generate_new_create_table_query, + r"kafka.*Saved offset [0-9]+ for topic-partition \[recreate_kafka_table:[0-9]+", + ), + ( + generate_old_create_table_query, + "kafka.*Committed offset [0-9]+.*recreate_kafka_table", + ), + ], +) +def test_kafka_recreate_kafka_table( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, +): + """ + Checks that materialized view work properly after dropping and recreating the Kafka table. + """ + topic_name = "recreate_kafka_table" + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=6): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group="recreate_kafka_table_group", + settings={ + "kafka_num_consumers": 4, + "kafka_flush_interval_ms": 1000, + "kafka_skip_broken_messages": 1048577, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + messages = [] + for i in range(120): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, "recreate_kafka_table", messages) + + instance.wait_for_log_line( + log_line, + repetitions=6, + look_behind_lines=100, + ) + + instance.query( + """ + DROP TABLE test.kafka; + """ + ) + + instance.rotate_logs() + + kafka_produce(kafka_cluster, "recreate_kafka_table", messages) + + instance.query(create_query) + + instance.wait_for_log_line( + log_line, + repetitions=6, + look_behind_lines=100, + ) + + # data was not flushed yet (it will be flushed 7.5 sec after creating MV) + assert int(instance.query("SELECT count() FROM test.view")) == 240 + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.kafka; + DROP TABLE test.view; + """ + ) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_commit_on_block_write( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "block" + get_topic_postfix(create_query_generator) + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={"kafka_max_block_size": 100}, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + {create_query}; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + cancel = threading.Event() + + # We need to pass i as a reference. Simple integers are passed by value. + # Making an array is probably the easiest way to "force pass by reference". + i = [0] + + def produce(i): + while not cancel.is_set(): + messages = [] + for _ in range(101): + messages.append(json.dumps({"key": i[0], "value": i[0]})) + i[0] += 1 + kafka_produce(kafka_cluster, topic_name, messages) + + kafka_thread = threading.Thread(target=produce, args=[i]) + kafka_thread.start() + + instance.query_with_retry( + "SELECT count() FROM test.view", + sleep_time=1, + check_callback=lambda res: int(res) >= 100, + ) + + cancel.set() + + instance.query("DROP TABLE test.kafka SYNC") + + instance.query(create_query) + kafka_thread.join() + + instance.query_with_retry( + "SELECT uniqExact(key) FROM test.view", + sleep_time=1, + check_callback=lambda res: int(res) >= i[0], + ) + + result = int(instance.query("SELECT count() == uniqExact(key) FROM test.view")) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + kafka_thread.join() + + assert result == 1, "Messages from kafka get duplicated!" + + +# TODO(antaljanosbenjamin): find another way to make insertion fail +@pytest.mark.parametrize( + "create_query_generator", + [ + generate_old_create_table_query, + # generate_new_create_table_query, + ], +) +def test_kafka_no_holes_when_write_suffix_failed( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + topic_name = "no_holes_when_write_suffix_failed" + get_topic_postfix( + create_query_generator + ) + + with kafka_topic(admin_client, topic_name): + messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] + kafka_produce(kafka_cluster, topic_name, messages) + + create_query = create_query_generator( + "kafka", + "key UInt64, value String", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 20, + "kafka_flush_interval_ms": 2000, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view SYNC; + DROP TABLE IF EXISTS test.consumer; + + {create_query}; + + CREATE TABLE test.view (key UInt64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/{topic_name}', 'node1') + ORDER BY key; + """ + ) + + # init PartitionManager (it starts container) earlier + pm = PartitionManager() + + instance.query( + """ + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka + WHERE NOT sleepEachRow(0.25); + """ + ) + + instance.wait_for_log_line("Polled batch of 20 messages") + # the tricky part here is that disconnect should happen after write prefix, but before write suffix + # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" + # while materialized view is working to inject zookeeper failure + pm.drop_instance_zk_connections(instance) + instance.wait_for_log_line( + "Error.*(Connection loss|Coordination::Exception).*while pushing to view" + ) + pm.heal_all() + instance.wait_for_log_line("Committed offset 22") + + result = instance.query( + "SELECT count(), uniqExact(key), max(key) FROM test.view" + ) + logging.debug(result) + + # kafka_cluster.open_bash_shell('instance') + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view SYNC; + """ + ) + + assert TSV(result) == TSV("22\t22\t22") + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_flush_by_block_size( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "flush_by_block_size" + get_topic_postfix(create_query_generator) + + cancel = threading.Event() + + def produce(): + while not cancel.is_set(): + messages = [] + messages.append(json.dumps({"key": 0, "value": 0})) + kafka_produce(kafka_cluster, topic_name, messages) + + kafka_thread = threading.Thread(target=produce) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + kafka_thread.start() + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 100, + "kafka_poll_max_batch_size": 1, + "kafka_flush_interval_ms": 120000, + }, + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + + {create_query}; + + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; + """ + ) + + # Wait for Kafka engine to consume this data + while 1 != int( + instance.query( + "SELECT count() FROM system.parts WHERE database = 'test' AND table = 'view' AND name = 'all_1_1_0'" + ) + ): + time.sleep(0.5) + + cancel.set() + kafka_thread.join() + + # more flushes can happens during test, we need to check only result of first flush (part named all_1_1_0). + result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'") + # logging.debug(result) + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ + ) + + # 100 = first poll should return 100 messages (and rows) + # not waiting for stream_flush_interval_ms + assert ( + int(result) == 100 + ), "Messages from kafka should be flushed when block of size kafka_max_block_size is formed!" + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_max_rows_per_message( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "custom_max_rows_per_message" + get_topic_postfix( + create_query_generator + ) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + num_rows = 5 + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="CustomSeparated", + settings={ + "format_custom_result_before_delimiter": "\n", + "format_custom_result_after_delimiter": "\n", + "kafka_max_rows_per_message": 3, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.kafka; + {create_query}; + + CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS + SELECT key, value FROM test.kafka; + """ + ) + + instance.query( + f"INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + message_count = 2 + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + + assert len(messages) == message_count + + assert ( + "".join(messages) + == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" + ) + + instance.query_with_retry( + "SELECT count() FROM test.view", + check_callback=lambda res: int(res) == num_rows, + ) + + result = instance.query("SELECT * FROM test.view") + assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_many_materialized_views( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = f"mmv-{get_topic_postfix(create_query_generator)}" + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=f"{topic_name}-group", + ) + + instance.query( + f""" + DROP TABLE IF EXISTS test.view1; + DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.consumer1; + DROP TABLE IF EXISTS test.consumer2; + {create_query}; + CREATE TABLE test.view1 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS + SELECT * FROM test.kafka; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.kafka; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, topic_name, messages) + + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + result1 = instance.query_with_retry( + "SELECT * FROM test.view1", check_callback=kafka_check_result + ) + result2 = instance.query_with_retry( + "SELECT * FROM test.view2", check_callback=kafka_check_result + ) + + instance.query( + """ + DROP TABLE test.consumer1; + DROP TABLE test.consumer2; + DROP TABLE test.view1; + DROP TABLE test.view2; + """ + ) + + kafka_check_result(result1, True) + kafka_check_result(result2, True) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_engine_put_errors_to_stream( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "kafka_engine_put_errors_to_stream" + get_topic_postfix( + create_query_generator + ) + create_query = create_query_generator( + "kafka", + "i Int64, s String", + topic_list=topic_name, + consumer_group=topic_name, + settings={ + "kafka_max_block_size": 128, + "kafka_handle_error_mode": "stream", + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.kafka_data; + DROP TABLE IF EXISTS test.kafka_errors; + {create_query}; + CREATE MATERIALIZED VIEW test.kafka_data (i Int64, s String) + ENGINE = MergeTree + ORDER BY i + AS SELECT i, s FROM test.kafka WHERE length(_error) == 0; + CREATE MATERIALIZED VIEW test.kafka_errors (topic String, partition Int64, offset Int64, raw String, error String) + ENGINE = MergeTree + ORDER BY (topic, offset) + AS SELECT + _topic AS topic, + _partition AS partition, + _offset AS offset, + _raw_message AS raw, + _error AS error + FROM test.kafka WHERE length(_error) > 0; + """ + ) + + messages = [] + for i in range(128): + if i % 2 == 0: + messages.append(json.dumps({"i": i, "s": random_string(8)})) + else: + # Unexpected json content for table test.kafka. + messages.append( + json.dumps({"i": "n_" + random_string(4), "s": random_string(8)}) + ) + + kafka_produce(kafka_cluster, topic_name, messages) + with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): + instance.wait_for_log_line("Committed offset 128") + + assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("64") + assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") + + instance.query( + """ + DROP TABLE test.kafka; + DROP TABLE test.kafka_data; + DROP TABLE test.kafka_errors; + """ + ) + + +@pytest.mark.parametrize( + "create_query_generator, do_direct_read", + [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], +) +def test_kafka_producer_consumer_separate_settings( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + do_direct_read, +): + instance.rotate_logs() + instance.query( + create_query_generator( + "test_kafka", + "key UInt64", + topic_list="separate_settings", + consumer_group="test", + ) + ) + + if do_direct_read: + instance.query("SELECT * FROM test.test_kafka") + instance.query("INSERT INTO test.test_kafka VALUES (1)") + + assert instance.contains_in_log("Kafka producer created") + assert instance.contains_in_log("Created #0 consumer") + + kafka_conf_warnings = instance.grep_in_log("rdk:CONFWARN") + + assert kafka_conf_warnings is not None + + for warn in kafka_conf_warnings.strip().split("\n"): + # this setting was applied via old syntax and applied on both consumer + # and producer configurations + assert "heartbeat.interval.ms" in warn + + kafka_consumer_applied_properties = instance.grep_in_log("Consumer set property") + kafka_producer_applied_properties = instance.grep_in_log("Producer set property") + + assert kafka_consumer_applied_properties is not None + assert kafka_producer_applied_properties is not None + + # global settings should be applied for consumer and producer + global_settings = { + "debug": "topic,protocol,cgrp,consumer", + "statistics.interval.ms": "600", + } + + for name, value in global_settings.items(): + property_in_log = f"{name}:{value}" + assert property_in_log in kafka_consumer_applied_properties + assert property_in_log in kafka_producer_applied_properties + + settings_topic__separate_settings__consumer = {"session.timeout.ms": "6001"} + + for name, value in settings_topic__separate_settings__consumer.items(): + property_in_log = f"{name}:{value}" + assert property_in_log in kafka_consumer_applied_properties + assert property_in_log not in kafka_producer_applied_properties + + producer_settings = {"transaction.timeout.ms": "60001"} + + for name, value in producer_settings.items(): + property_in_log = f"{name}:{value}" + assert property_in_log not in kafka_consumer_applied_properties + assert property_in_log in kafka_producer_applied_properties + + # Should be ignored, because it is inside producer tag + producer_legacy_syntax__topic_separate_settings = {"message.timeout.ms": "300001"} + + for name, value in producer_legacy_syntax__topic_separate_settings.items(): + property_in_log = f"{name}:{value}" + assert property_in_log not in kafka_consumer_applied_properties + assert property_in_log not in kafka_producer_applied_properties + + # Old syntax, applied on consumer and producer + legacy_syntax__topic_separated_settings = {"heartbeat.interval.ms": "302"} + + for name, value in legacy_syntax__topic_separated_settings.items(): + property_in_log = f"{name}:{value}" + assert property_in_log in kafka_consumer_applied_properties + assert property_in_log in kafka_producer_applied_properties + + +def test_kafka_virtual_columns( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + topic_config = { + # default retention, since predefined timestamp_ms is used. + "retention.ms": "-1", + } + with kafka_topic(get_admin_client(kafka_cluster), "virt1", config=topic_config): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'virt1', + kafka_group_name = 'virt1', + kafka_commit_on_select = 1, + kafka_format = 'JSONEachRow'; + """ + ) + + messages = "" + for i in range(25): + messages += json.dumps({"key": i, "value": i}) + "\n" + kafka_produce(kafka_cluster, "virt1", [messages], 0) + + messages = "" + for i in range(25, 50): + messages += json.dumps({"key": i, "value": i}) + "\n" + kafka_produce(kafka_cluster, "virt1", [messages], 0) + + result = "" + while True: + result += instance.query( + """SELECT _key, key, _topic, value, _offset, _partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) AS _timestamp FROM test.kafka""", + ignore_error=True, + ) + if kafka_check_result(result, False, "test_kafka_virtual1.reference"): + break + + kafka_check_result(result, True, "test_kafka_virtual1.reference") + + +def test_kafka_settings_new_syntax( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = '{kafka_broker}:19092', + kafka_topic_list = '{kafka_topic_new}', + kafka_group_name = '{kafka_group_name_new}', + kafka_format = '{kafka_format_json_each_row}', + kafka_row_delimiter = '\\n', + kafka_commit_on_select = 1, + kafka_client_id = '{kafka_client_id} test 1234', + kafka_skip_broken_messages = 1; + """ + ) + + messages = [] + for i in range(25): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) + + # Insert couple of malformed messages. + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, ["}{very_broken_message,"]) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, ["}another{very_broken_message,"]) + + messages = [] + for i in range(25, 50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_NEW) + assert members[0]["client_id"] == "instance test 1234" + + +def test_kafka_tsv_with_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + messages = [] + for i in range(50): + messages.append("{i}\t{i}".format(i=i)) + kafka_produce(kafka_cluster, "tsv", messages) + + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'tsv', + kafka_commit_on_select = 1, + kafka_group_name = 'tsv', + kafka_format = 'TSV'; + """ + ) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + +def test_kafka_csv_with_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + messages = [] + for i in range(50): + messages.append("{i}, {i}".format(i=i)) + kafka_produce(kafka_cluster, "csv", messages) + + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'csv', + kafka_commit_on_select = 1, + kafka_group_name = 'csv', + kafka_format = 'CSV'; + """ + ) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_format_with_prefix_and_suffix( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "custom" + get_topic_postfix(create_query_generator) + + with kafka_topic(get_admin_client(kafka_cluster), topic_name): + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + format="CustomSeparated", + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + {create_query}; + """ + ) + + instance.query( + "INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" + ) + + message_count = 2 + messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) + + assert len(messages) == 2 + + assert ( + "".join(messages) + == "\n0\t0\n\n\n10\t100\n\n" + ) + + +if __name__ == "__main__": + init_cluster_and_instance() + conftest_cluster.start() + input("Cluster created, press any key to destroy...") + conftest_cluster.shutdown() diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test_4.py similarity index 57% rename from tests/integration/test_storage_kafka/test.py rename to tests/integration/test_storage_kafka/test_4.py index 0783b50caae..c72d2e01e0a 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test_4.py @@ -1,312 +1,45 @@ import json -import os.path as p -import random -import socket +import logging import threading import time -import logging -import io -import string -import math + import pytest -import avro.schema -import avro.io -import avro.datafile from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) -import kafka.errors -from kafka import KafkaAdminClient, KafkaProducer, BrokerConnection -from kafka.protocol.admin import DescribeGroupsRequest_v1 -from kafka.protocol.group import MemberAssignment -from google.protobuf.internal.encoder import _VarintBytes - - from helpers.client import QueryRuntimeException -from helpers.cluster import is_arm, ClickHouseCluster, ClickHouseInstance -from helpers.network import PartitionManager +from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm from helpers.test_tools import TSV -from test_storage_kafka.kafka_tests_utils import ( - kafka_create_topic, - kafka_delete_topic, - get_kafka_producer, - generate_old_create_table_query, - generate_new_create_table_query, - kafka_produce, - producer_serializer, - avro_message, - avro_confluent_message, - get_admin_client, - kafka_produce_protobuf_messages, - kafka_produce_protobuf_social, - kafka_produce_protobuf_messages_no_delimiters, - kafka_topic, - existing_kafka_topic, - must_use_thread_per_consumer, - get_topic_postfix, - kafka_consume_with_retry, -) +from kafka import KafkaAdminClient, KafkaProducer from test_storage_kafka.conftest import ( + KAFKA_TOPIC_NEW, conftest_cluster, init_cluster_and_instance, - KAFKA_TOPIC_OLD, - KAFKA_CONSUMER_GROUP_OLD, - KAFKA_TOPIC_NEW, - KAFKA_CONSUMER_GROUP_NEW, ) - -from test_storage_kafka import message_with_repeated_pb2 +from test_storage_kafka.kafka_tests_utils import ( + avro_confluent_message, + avro_message, + describe_consumer_group, + existing_kafka_topic, + generate_new_create_table_query, + generate_old_create_table_query, + get_admin_client, + get_kafka_producer, + get_topic_postfix, + kafka_check_result, + kafka_create_topic, + kafka_delete_topic, + kafka_produce, + kafka_produce_protobuf_messages_no_delimiters, + kafka_topic, + must_use_thread_per_consumer, + producer_serializer, +) # Skip all tests on ARM if is_arm(): pytestmark = pytest.mark.skip -# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. -# TODO: add test for SELECT LIMIT is working. - - -# Tests -@pytest.mark.parametrize( - "create_query_generator, do_direct_read", - [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], -) -def test_kafka_column_types( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - do_direct_read, -): - def assert_returned_exception(e): - assert e.value.returncode == 36 - assert ( - "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL expressions for columns." - in str(e.value) - ) - - # check column with DEFAULT expression - with pytest.raises(QueryRuntimeException) as exception: - instance.query(create_query_generator("kafka", "a Int, b Int DEFAULT 0")) - assert_returned_exception(exception) - - # check EPHEMERAL - with pytest.raises(QueryRuntimeException) as exception: - instance.query(create_query_generator("kafka", "a Int, b Int EPHEMERAL")) - assert_returned_exception(exception) - - # check MATERIALIZED - with pytest.raises(QueryRuntimeException) as exception: - instance.query( - """ - CREATE TABLE test.kafka (a Int, b String MATERIALIZED toString(a)) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') - """ - ) - assert_returned_exception(exception) - - if do_direct_read: - # check ALIAS - instance.query( - create_query_generator( - "kafka", - "a Int, b String Alias toString(a)", - settings={"kafka_commit_on_select": True}, - ) - ) - messages = [] - for i in range(5): - messages.append(json.dumps({"a": i})) - kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) - result = "" - expected = TSV( - """ - 0\t0 - 1\t1 - 2\t2 - 3\t3 - 4\t4 - """ - ) - retries = 50 - while retries > 0: - result += instance.query("SELECT a, b FROM test.kafka", ignore_error=True) - if TSV(result) == expected: - break - retries -= 1 - time.sleep(0.5) - - assert TSV(result) == expected - - instance.query("DROP TABLE test.kafka SYNC") - - -def test_kafka_settings_old_syntax( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - assert TSV( - instance.query( - "SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro", - ignore_error=True, - ) - ) == TSV( - f"""kafka_broker kafka1 -kafka_client_id instance -kafka_format_json_each_row JSONEachRow -kafka_group_name_new {KAFKA_CONSUMER_GROUP_NEW} -kafka_group_name_old {KAFKA_CONSUMER_GROUP_OLD} -kafka_topic_new new_t -kafka_topic_old old_t -""" - ) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n') - SETTINGS kafka_commit_on_select = 1; - """ - ) - - # Don't insert malformed messages since old settings syntax - # doesn't support skipping of broken messages. - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, KAFKA_TOPIC_OLD, messages) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_OLD) - assert members[0]["client_id"] == "ClickHouse-instance-test-kafka" - # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose")) - - -def test_kafka_settings_new_syntax( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = '{kafka_broker}:19092', - kafka_topic_list = '{kafka_topic_new}', - kafka_group_name = '{kafka_group_name_new}', - kafka_format = '{kafka_format_json_each_row}', - kafka_row_delimiter = '\\n', - kafka_commit_on_select = 1, - kafka_client_id = '{kafka_client_id} test 1234', - kafka_skip_broken_messages = 1; - """ - ) - - messages = [] - for i in range(25): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) - - # Insert couple of malformed messages. - kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, ["}{very_broken_message,"]) - kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, ["}another{very_broken_message,"]) - - messages = [] - for i in range(25, 50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_NEW) - assert members[0]["client_id"] == "instance test 1234" - - -def test_kafka_settings_predefined_macros( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = '{kafka_broker}:19092', - kafka_topic_list = '{database}_{table}_topic', - kafka_group_name = '{database}_{table}_group', - kafka_format = '{kafka_format_json_each_row}', - kafka_row_delimiter = '\\n', - kafka_commit_on_select = 1, - kafka_client_id = '{database}_{table} test 1234', - kafka_skip_broken_messages = 1; - """ - ) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "test_kafka_topic", messages) - - result = instance.query("SELECT * FROM test.kafka", ignore_error=True) - kafka_check_result(result, True) - - members = describe_consumer_group(kafka_cluster, "test_kafka_group") - assert members[0]["client_id"] == "test_kafka test 1234" - - -def test_kafka_json_as_string( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - kafka_produce( - kafka_cluster, - "kafka_json_as_string", - [ - '{"t": 123, "e": {"x": "woof"} }', - "", - '{"t": 124, "e": {"x": "test"} }', - '{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"}', - ], - ) - - # 'tombstone' record (null value) = marker of deleted record - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) - producer.send(topic="kafka_json_as_string", key="xxx") - producer.flush() - - instance.query( - """ - CREATE TABLE test.kafka (field String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'kafka_json_as_string', - kafka_group_name = 'kafka_json_as_string', - kafka_commit_on_select = 1, - kafka_format = 'JSONAsString', - kafka_flush_interval_ms=1000; - """ - ) - - result = instance.query("SELECT * FROM test.kafka;") - expected = """\ -{"t": 123, "e": {"x": "woof"} } -{"t": 124, "e": {"x": "test"} } -{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"} -""" - assert TSV(result) == TSV(expected) - assert instance.contains_in_log( - "Parsing of message (topic: kafka_json_as_string, partition: 0, offset: [0-9]*) return no rows" - ) - @pytest.mark.parametrize( "create_query_generator", @@ -728,2071 +461,6 @@ def test_kafka_formats( kafka_delete_topic(get_admin_client(kafka_cluster), topic_name) -# Since everything is async and shaky when receiving messages from Kafka, -# we may want to try and check results multiple times in a loop. -def kafka_check_result(result, check=False, ref_file="test_kafka_json.reference"): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) - - -def decode_avro(message): - b = io.BytesIO(message) - ret = avro.datafile.DataFileReader(b, avro.io.DatumReader()) - - output = io.StringIO() - for record in ret: - print(record, file=output) - return output.getvalue() - - -# https://stackoverflow.com/a/57692111/1555175 -def describe_consumer_group(kafka_cluster, name): - client = BrokerConnection("localhost", kafka_cluster.kafka_port, socket.AF_INET) - client.connect_blocking() - - list_members_in_groups = DescribeGroupsRequest_v1(groups=[name]) - future = client.send(list_members_in_groups) - while not future.is_done: - for resp, f in client.recv(): - f.success(resp) - - ( - error_code, - group_id, - state, - protocol_type, - protocol, - members, - ) = future.value.groups[0] - - res = [] - for member in members: - (member_id, client_id, client_host, member_metadata, member_assignment) = member - member_info = {} - member_info["member_id"] = member_id - member_info["client_id"] = client_id - member_info["client_host"] = client_host - member_topics_assignment = [] - for topic, partitions in MemberAssignment.decode(member_assignment).assignment: - member_topics_assignment.append({"topic": topic, "partitions": partitions}) - member_info["assignment"] = member_topics_assignment - res.append(member_info) - return res - - -# Tests -def test_kafka_issue11308( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - # Check that matview does respect Kafka SETTINGS - kafka_produce( - kafka_cluster, - "issue11308", - [ - '{"t": 123, "e": {"x": "woof"} }', - '{"t": 123, "e": {"x": "woof"} }', - '{"t": 124, "e": {"x": "test"} }', - ], - ) - - instance.query( - """ - CREATE TABLE test.persistent_kafka ( - time UInt64, - some_string String - ) - ENGINE = MergeTree() - ORDER BY time; - - CREATE TABLE test.kafka (t UInt64, `e.x` String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'issue11308', - kafka_group_name = 'issue11308', - kafka_format = 'JSONEachRow', - kafka_row_delimiter = '\\n', - kafka_flush_interval_ms=1000, - input_format_import_nested_json = 1; - - CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.persistent_kafka AS - SELECT - `t` AS `time`, - `e.x` AS `some_string` - FROM test.kafka; - """ - ) - - while int(instance.query("SELECT count() FROM test.persistent_kafka")) < 3: - time.sleep(1) - - result = instance.query("SELECT * FROM test.persistent_kafka ORDER BY time;") - - instance.query( - """ - DROP TABLE test.persistent_kafka; - DROP TABLE test.persistent_kafka_mv; - """ - ) - - expected = """\ -123 woof -123 woof -124 test -""" - assert TSV(result) == TSV(expected) - - -def test_kafka_issue4116( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - # Check that format_csv_delimiter parameter works now - as part of all available format settings. - kafka_produce( - kafka_cluster, - "issue4116", - ["1|foo", "2|bar", "42|answer", "100|multi\n101|row\n103|message"], - ) - - instance.query( - """ - CREATE TABLE test.kafka (a UInt64, b String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'issue4116', - kafka_group_name = 'issue4116', - kafka_commit_on_select = 1, - kafka_format = 'CSV', - kafka_row_delimiter = '\\n', - format_csv_delimiter = '|'; - """ - ) - - result = instance.query("SELECT * FROM test.kafka ORDER BY a;") - - expected = """\ -1 foo -2 bar -42 answer -100 multi -101 row -103 message -""" - assert TSV(result) == TSV(expected) - - -def test_kafka_consumer_hang( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - admin_client = get_admin_client(kafka_cluster) - - topic_name = "consumer_hang" - kafka_create_topic(admin_client, topic_name, num_partitions=8) - - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_group_name = '{topic_name}', - kafka_format = 'JSONEachRow', - kafka_num_consumers = 8; - CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = Memory(); - CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka; - """ - ) - - instance.wait_for_log_line("kafka.*Stalled", repetitions=20) - - # This should trigger heartbeat fail, - # which will trigger REBALANCE_IN_PROGRESS, - # and which can lead to consumer hang. - kafka_cluster.pause_container("kafka1") - instance.wait_for_log_line("heartbeat error") - kafka_cluster.unpause_container("kafka1") - - # logging.debug("Attempt to drop") - instance.query("DROP TABLE test.kafka") - - # kafka_cluster.open_bash_shell('instance') - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - # original problem appearance was a sequence of the following messages in librdkafka logs: - # BROKERFAIL -> |ASSIGN| -> REBALANCE_IN_PROGRESS -> "waiting for rebalance_cb" (repeated forever) - # so it was waiting forever while the application will execute queued rebalance callback - - # from a user perspective: we expect no hanging 'drop' queries - # 'dr'||'op' to avoid self matching - assert ( - int( - instance.query( - "select count() from system.processes where position(lower(query),'dr'||'op')>0" - ) - ) - == 0 - ) - - # cleanup unread messages so kafka will not wait reading consumers to delete topic - instance.query( - f""" - CREATE TABLE test.kafka (key UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_commit_on_select = 1, - kafka_group_name = '{topic_name}', - kafka_format = 'JSONEachRow', - kafka_num_consumers = 8; - """ - ) - - num_read = int(instance.query("SELECT count() FROM test.kafka")) - logging.debug(f"read {num_read} from {topic_name} before delete") - instance.query("DROP TABLE test.kafka") - kafka_delete_topic(admin_client, topic_name) - - -def test_kafka_consumer_hang2( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - admin_client = get_admin_client(kafka_cluster) - - topic_name = "consumer_hang2" - kafka_create_topic(admin_client, topic_name) - - instance.query( - """ - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka2; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'consumer_hang2', - kafka_group_name = 'consumer_hang2', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - - CREATE TABLE test.kafka2 (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'consumer_hang2', - kafka_commit_on_select = 1, - kafka_group_name = 'consumer_hang2', - kafka_format = 'JSONEachRow'; - """ - ) - - # first consumer subscribe the topic, try to poll some data, and go to rest - instance.query("SELECT * FROM test.kafka") - - # second consumer do the same leading to rebalance in the first - # consumer, try to poll some data - instance.query("SELECT * FROM test.kafka2") - - # echo 'SELECT * FROM test.kafka; SELECT * FROM test.kafka2; DROP TABLE test.kafka;' | clickhouse client -mn & - # kafka_cluster.open_bash_shell('instance') - - # first consumer has pending rebalance callback unprocessed (no poll after select) - # one of those queries was failing because of - # https://github.com/edenhill/librdkafka/issues/2077 - # https://github.com/edenhill/librdkafka/issues/2898 - instance.query("DROP TABLE test.kafka") - instance.query("DROP TABLE test.kafka2") - - # from a user perspective: we expect no hanging 'drop' queries - # 'dr'||'op' to avoid self matching - assert ( - int( - instance.query( - "select count() from system.processes where position(lower(query),'dr'||'op')>0" - ) - ) - == 0 - ) - kafka_delete_topic(admin_client, topic_name) - - -# sequential read from different consumers leads to breaking lot of kafka invariants -# (first consumer will get all partitions initially, and may have problems in doing polls every 60 sec) -def test_kafka_read_consumers_in_parallel( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - admin_client = get_admin_client(kafka_cluster) - - topic_name = "read_consumers_in_parallel" - kafka_create_topic(admin_client, topic_name, num_partitions=8) - - cancel = threading.Event() - - def produce(): - while not cancel.is_set(): - messages = [] - for _ in range(100): - messages.append(json.dumps({"key": 0, "value": 0})) - kafka_produce(kafka_cluster, "read_consumers_in_parallel", messages) - time.sleep(1) - - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() - - # when we have more than 1 consumer in a single table, - # and kafka_thread_per_consumer=0 - # all the consumers should be read in parallel, not in sequence. - # then reading in parallel 8 consumers with 1 seconds kafka_poll_timeout_ms and less than 1 sec limit - # we should have exactly 1 poll per consumer (i.e. 8 polls) every 1 seconds (from different threads) - # in case parallel consuming is not working we will have only 1 poll every 1 seconds (from the same thread). - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_group_name = '{topic_name}', - kafka_format = 'JSONEachRow', - kafka_num_consumers = 8, - kafka_thread_per_consumer = 0, - kafka_poll_timeout_ms = 1000, - kafka_flush_interval_ms = 999; - CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = Memory(); - CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka; - """ - ) - - instance.wait_for_log_line( - "kafka.*Polled batch of [0-9]+.*read_consumers_in_parallel", - repetitions=64, - look_behind_lines=100, - timeout=30, # we should get 64 polls in ~8 seconds, but when read sequentially it will take more than 64 sec - ) - - cancel.set() - kafka_thread.join() - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - DROP TABLE test.kafka; - """ - ) - kafka_delete_topic(admin_client, topic_name) - - -def test_kafka_csv_with_delimiter( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - messages = [] - for i in range(50): - messages.append("{i}, {i}".format(i=i)) - kafka_produce(kafka_cluster, "csv", messages) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'csv', - kafka_commit_on_select = 1, - kafka_group_name = 'csv', - kafka_format = 'CSV'; - """ - ) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - -def test_kafka_tsv_with_delimiter( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - messages = [] - for i in range(50): - messages.append("{i}\t{i}".format(i=i)) - kafka_produce(kafka_cluster, "tsv", messages) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'tsv', - kafka_commit_on_select = 1, - kafka_group_name = 'tsv', - kafka_format = 'TSV'; - """ - ) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - -def test_kafka_select_empty( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - admin_client = get_admin_client(kafka_cluster) - topic_name = "empty" - kafka_create_topic(admin_client, topic_name) - - instance.query( - f""" - CREATE TABLE test.kafka (key UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic_name}', - kafka_commit_on_select = 1, - kafka_group_name = '{topic_name}', - kafka_format = 'TSV', - kafka_row_delimiter = '\\n'; - """ - ) - - assert int(instance.query("SELECT count() FROM test.kafka")) == 0 - kafka_delete_topic(admin_client, topic_name) - - -def test_kafka_json_without_delimiter( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - messages = "" - for i in range(25): - messages += json.dumps({"key": i, "value": i}) + "\n" - kafka_produce(kafka_cluster, "json", [messages]) - - messages = "" - for i in range(25, 50): - messages += json.dumps({"key": i, "value": i}) + "\n" - kafka_produce(kafka_cluster, "json", [messages]) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'json', - kafka_group_name = 'json', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - """ - ) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - -def test_kafka_protobuf(kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance): - kafka_produce_protobuf_messages(kafka_cluster, "pb", 0, 20) - kafka_produce_protobuf_messages(kafka_cluster, "pb", 20, 1) - kafka_produce_protobuf_messages(kafka_cluster, "pb", 21, 29) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'pb', - kafka_group_name = 'pb', - kafka_format = 'Protobuf', - kafka_commit_on_select = 1, - kafka_schema = 'kafka.proto:KeyValuePair'; - """ - ) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - -def test_kafka_string_field_on_first_position_in_protobuf( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - # https://github.com/ClickHouse/ClickHouse/issues/12615 - kafka_produce_protobuf_social( - kafka_cluster, "string_field_on_first_position_in_protobuf", 0, 20 - ) - kafka_produce_protobuf_social( - kafka_cluster, "string_field_on_first_position_in_protobuf", 20, 1 - ) - kafka_produce_protobuf_social( - kafka_cluster, "string_field_on_first_position_in_protobuf", 21, 29 - ) - - instance.query( - """ -CREATE TABLE test.kafka ( - username String, - timestamp Int32 - ) ENGINE = Kafka() -SETTINGS - kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'string_field_on_first_position_in_protobuf', - kafka_group_name = 'string_field_on_first_position_in_protobuf', - kafka_format = 'Protobuf', - kafka_commit_on_select = 1, - kafka_schema = 'social:User'; - """ - ) - - result = instance.query("SELECT * FROM test.kafka", ignore_error=True) - expected = """\ -John Doe 0 1000000 -John Doe 1 1000001 -John Doe 2 1000002 -John Doe 3 1000003 -John Doe 4 1000004 -John Doe 5 1000005 -John Doe 6 1000006 -John Doe 7 1000007 -John Doe 8 1000008 -John Doe 9 1000009 -John Doe 10 1000010 -John Doe 11 1000011 -John Doe 12 1000012 -John Doe 13 1000013 -John Doe 14 1000014 -John Doe 15 1000015 -John Doe 16 1000016 -John Doe 17 1000017 -John Doe 18 1000018 -John Doe 19 1000019 -John Doe 20 1000020 -John Doe 21 1000021 -John Doe 22 1000022 -John Doe 23 1000023 -John Doe 24 1000024 -John Doe 25 1000025 -John Doe 26 1000026 -John Doe 27 1000027 -John Doe 28 1000028 -John Doe 29 1000029 -John Doe 30 1000030 -John Doe 31 1000031 -John Doe 32 1000032 -John Doe 33 1000033 -John Doe 34 1000034 -John Doe 35 1000035 -John Doe 36 1000036 -John Doe 37 1000037 -John Doe 38 1000038 -John Doe 39 1000039 -John Doe 40 1000040 -John Doe 41 1000041 -John Doe 42 1000042 -John Doe 43 1000043 -John Doe 44 1000044 -John Doe 45 1000045 -John Doe 46 1000046 -John Doe 47 1000047 -John Doe 48 1000048 -John Doe 49 1000049 -""" - assert TSV(result) == TSV(expected) - - -def test_kafka_protobuf_no_delimiter( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'pb_no_delimiter', - kafka_group_name = 'pb_no_delimiter', - kafka_format = 'ProtobufSingle', - kafka_commit_on_select = 1, - kafka_schema = 'kafka.proto:KeyValuePair'; - """ - ) - - kafka_produce_protobuf_messages_no_delimiters( - kafka_cluster, "pb_no_delimiter", 0, 20 - ) - kafka_produce_protobuf_messages_no_delimiters( - kafka_cluster, "pb_no_delimiter", 20, 1 - ) - kafka_produce_protobuf_messages_no_delimiters( - kafka_cluster, "pb_no_delimiter", 21, 29 - ) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - instance.query( - """ - CREATE TABLE test.kafka_writer (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'pb_no_delimiter', - kafka_group_name = 'pb_no_delimiter', - kafka_format = 'ProtobufSingle', - kafka_commit_on_select = 1, - kafka_schema = 'kafka.proto:KeyValuePair'; - """ - ) - - instance.query( - "INSERT INTO test.kafka_writer VALUES (13,'Friday'),(42,'Answer to the Ultimate Question of Life, the Universe, and Everything'), (110, 'just a number')" - ) - - time.sleep(1) - - result = instance.query("SELECT * FROM test.kafka ORDER BY key", ignore_error=True) - - expected = """\ -13 Friday -42 Answer to the Ultimate Question of Life, the Universe, and Everything -110 just a number -""" - assert TSV(result) == TSV(expected) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_materialized_view( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "mv" - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - DROP TABLE IF EXISTS test.kafka; - - {create_query_generator("kafka", "key UInt64, value UInt64", topic_list=topic_name, consumer_group="mv")}; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, topic_name, messages) - - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - result = instance.query_with_retry( - "SELECT * FROM test.view", check_callback=kafka_check_result - ) - - kafka_check_result(result, True) - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - DROP TABLE test.kafka; - """ - ) - - -@pytest.mark.parametrize( - "create_query_generator, log_line", - [ - ( - generate_new_create_table_query, - r"kafka.*Saved offset [0-9]+ for topic-partition \[recreate_kafka_table:[0-9]+", - ), - ( - generate_old_create_table_query, - "kafka.*Committed offset [0-9]+.*recreate_kafka_table", - ), - ], -) -def test_kafka_recreate_kafka_table( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - log_line, -): - """ - Checks that materialized view work properly after dropping and recreating the Kafka table. - """ - topic_name = "recreate_kafka_table" - thread_per_consumer = must_use_thread_per_consumer(create_query_generator) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=6): - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group="recreate_kafka_table_group", - settings={ - "kafka_num_consumers": 4, - "kafka_flush_interval_ms": 1000, - "kafka_skip_broken_messages": 1048577, - "kafka_thread_per_consumer": thread_per_consumer, - }, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {create_query}; - - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - messages = [] - for i in range(120): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, "recreate_kafka_table", messages) - - instance.wait_for_log_line( - log_line, - repetitions=6, - look_behind_lines=100, - ) - - instance.query( - """ - DROP TABLE test.kafka; - """ - ) - - instance.rotate_logs() - - kafka_produce(kafka_cluster, "recreate_kafka_table", messages) - - instance.query(create_query) - - instance.wait_for_log_line( - log_line, - repetitions=6, - look_behind_lines=100, - ) - - # data was not flushed yet (it will be flushed 7.5 sec after creating MV) - assert int(instance.query("SELECT count() FROM test.view")) == 240 - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.kafka; - DROP TABLE test.view; - """ - ) - - -@pytest.mark.parametrize( - "create_query_generator, log_line", - [ - (generate_old_create_table_query, "Committed offset {offset}"), - ( - generate_new_create_table_query, - r"kafka.*Saved offset [0-9]+ for topic-partition \[{topic}:[0-9]+\]", - ), - ], -) -def test_librdkafka_compression( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - log_line, -): - """ - Regression for UB in snappy-c (that is used in librdkafka), - backport pr is [1]. - - [1]: https://github.com/ClickHouse-Extras/librdkafka/pull/3 - - Example of corruption: - - 2020.12.10 09:59:56.831507 [ 20 ] {} void DB::StorageKafka::threadFunc(size_t): Code: 27. DB::Exception: Cannot parse input: expected '"' before: 'foo"}': (while reading the value of key value): (at row 1) - - To trigger this regression there should duplicated messages - - Orignal reproducer is: - $ gcc --version |& fgrep gcc - gcc (GCC) 10.2.0 - $ yes foobarbaz | fold -w 80 | head -n10 >| in-… - $ make clean && make CFLAGS='-Wall -g -O2 -ftree-loop-vectorize -DNDEBUG=1 -DSG=1 -fPIC' - $ ./verify in - final comparision of in failed at 20 of 100 - - """ - - supported_compression_types = ["gzip", "snappy", "lz4", "zstd", "uncompressed"] - - messages = [] - expected = [] - - value = "foobarbaz" * 10 - number_of_messages = 50 - for i in range(number_of_messages): - messages.append(json.dumps({"key": i, "value": value})) - expected.append(f"{i}\t{value}") - - expected = "\n".join(expected) - - admin_client = get_admin_client(kafka_cluster) - - for compression_type in supported_compression_types: - logging.debug(("Check compression {}".format(compression_type))) - - topic_name = "test_librdkafka_compression_{}".format(compression_type) - topic_config = {"compression.type": compression_type} - with kafka_topic(admin_client, topic_name, config=topic_config): - instance.query( - """{create_query}; - - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree() - ORDER BY key; - - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """.format( - create_query=create_query_generator( - "kafka", - "key UInt64, value String", - topic_list=topic_name, - format="JSONEachRow", - settings={"kafka_flush_interval_ms": 1000}, - ), - ) - ) - - kafka_produce(kafka_cluster, topic_name, messages) - - instance.wait_for_log_line( - log_line.format(offset=number_of_messages, topic=topic_name) - ) - result = instance.query("SELECT * FROM test.view") - assert TSV(result) == TSV(expected) - - instance.query("DROP TABLE test.kafka SYNC") - instance.query("DROP TABLE test.consumer SYNC") - instance.query("DROP TABLE test.view SYNC") - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_new_create_table_query, generate_old_create_table_query], -) -def test_kafka_materialized_view_with_subquery( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "mysq" - logging.debug(f"Using topic {topic_name}") - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - {create_query}; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM (SELECT * FROM test.kafka); - """ - ) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, topic_name, messages) - - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - result = instance.query_with_retry( - "SELECT * FROM test.view", - check_callback=kafka_check_result, - retry_count=40, - sleep_time=0.75, - ) - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - kafka_check_result(result, True) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_many_materialized_views( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = f"mmv-{get_topic_postfix(create_query_generator)}" - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=f"{topic_name}-group", - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view1; - DROP TABLE IF EXISTS test.view2; - DROP TABLE IF EXISTS test.consumer1; - DROP TABLE IF EXISTS test.consumer2; - {create_query}; - CREATE TABLE test.view1 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE TABLE test.view2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS - SELECT * FROM test.kafka; - CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS - SELECT * FROM test.kafka; - """ - ) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, topic_name, messages) - - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - result1 = instance.query_with_retry( - "SELECT * FROM test.view1", check_callback=kafka_check_result - ) - result2 = instance.query_with_retry( - "SELECT * FROM test.view2", check_callback=kafka_check_result - ) - - instance.query( - """ - DROP TABLE test.consumer1; - DROP TABLE test.consumer2; - DROP TABLE test.view1; - DROP TABLE test.view2; - """ - ) - - kafka_check_result(result1, True) - kafka_check_result(result2, True) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_flush_on_big_message( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - # Create batches of messages of size ~100Kb - kafka_messages = 1000 - batch_messages = 1000 - topic_name = "flush" + get_topic_postfix(create_query_generator) - messages = [ - json.dumps({"key": i, "value": "x" * 100}) * batch_messages - for i in range(kafka_messages) - ] - kafka_produce(kafka_cluster, topic_name, messages) - - admin_client = get_admin_client(kafka_cluster) - - with existing_kafka_topic(admin_client, topic_name): - create_query = create_query_generator( - "kafka", - "key UInt64, value String", - topic_list=topic_name, - consumer_group=topic_name, - settings={"kafka_max_block_size": 10}, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {create_query}; - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - received = False - while not received: - try: - offsets = admin_client.list_consumer_group_offsets(topic_name) - for topic, offset in list(offsets.items()): - if topic.topic == topic_name and offset.offset == kafka_messages: - received = True - break - except kafka.errors.GroupCoordinatorNotAvailableError: - continue - - while True: - result = instance.query("SELECT count() FROM test.view") - if int(result) == kafka_messages * batch_messages: - break - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - assert ( - int(result) == kafka_messages * batch_messages - ), "ClickHouse lost some messages: {}".format(result) - - -def test_kafka_virtual_columns( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - topic_config = { - # default retention, since predefined timestamp_ms is used. - "retention.ms": "-1", - } - with kafka_topic(get_admin_client(kafka_cluster), "virt1", config=topic_config): - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'virt1', - kafka_group_name = 'virt1', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - """ - ) - - messages = "" - for i in range(25): - messages += json.dumps({"key": i, "value": i}) + "\n" - kafka_produce(kafka_cluster, "virt1", [messages], 0) - - messages = "" - for i in range(25, 50): - messages += json.dumps({"key": i, "value": i}) + "\n" - kafka_produce(kafka_cluster, "virt1", [messages], 0) - - result = "" - while True: - result += instance.query( - """SELECT _key, key, _topic, value, _offset, _partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) AS _timestamp FROM test.kafka""", - ignore_error=True, - ) - if kafka_check_result(result, False, "test_kafka_virtual1.reference"): - break - - kafka_check_result(result, True, "test_kafka_virtual1.reference") - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_virtual_columns_with_materialized_view( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_config = { - # default retention, since predefined timestamp_ms is used. - "retention.ms": "-1", - } - # the topic name is hardcoded in reference, it doesn't worth to create two reference files to have separate topics, - # as the context manager will always clean up the topic - topic_name = "virt2" - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=f"{topic_name}-group", - ) - with kafka_topic(get_admin_client(kafka_cluster), topic_name, config=topic_config): - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {create_query}; - CREATE TABLE test.view (key UInt64, value UInt64, kafka_key String, topic String, offset UInt64, partition UInt64, timestamp Nullable(DateTime('UTC'))) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT *, _key as kafka_key, _topic as topic, _offset as offset, _partition as partition, _timestamp = 0 ? '0000-00-00 00:00:00' : toString(_timestamp) as timestamp FROM test.kafka; - """ - ) - - messages = [] - for i in range(50): - messages.append(json.dumps({"key": i, "value": i})) - kafka_produce(kafka_cluster, topic_name, messages, 0) - - def check_callback(result): - return kafka_check_result(result, False, "test_kafka_virtual2.reference") - - result = instance.query_with_retry( - "SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view ORDER BY kafka_key, key", - check_callback=check_callback, - ) - - kafka_check_result(result, True, "test_kafka_virtual2.reference") - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - -def insert_with_retry(instance, values, table_name="kafka", max_try_count=5): - try_count = 0 - while True: - logging.debug(f"Inserting, try_count is {try_count}") - try: - try_count += 1 - instance.query(f"INSERT INTO test.{table_name} VALUES {values}") - break - except QueryRuntimeException as e: - if "Local: Timed out." in str(e) and try_count < max_try_count: - continue - else: - raise - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_insert( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "insert1" + get_topic_postfix(create_query_generator) - - instance.query( - create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format="TSV", - ) - ) - - message_count = 50 - values = [] - for i in range(message_count): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - insert_with_retry(instance, values) - - messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) - result = "\n".join(messages) - kafka_check_result(result, True) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_produce_consume( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "insert2" + get_topic_postfix(create_query_generator) - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format="TSV", - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {create_query}; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - messages_num = 10000 - - def insert(): - values = [] - for i in range(messages_num): - values.append("({i}, {i})".format(i=i)) - values = ",".join(values) - - insert_with_retry(instance, values) - - threads = [] - threads_num = 16 - for _ in range(threads_num): - threads.append(threading.Thread(target=insert)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - expected_row_count = messages_num * threads_num - result = instance.query_with_retry( - "SELECT count() FROM test.view", - sleep_time=1, - retry_count=20, - check_callback=lambda result: int(result) == expected_row_count, - ) - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - for thread in threads: - thread.join() - - assert ( - int(result) == expected_row_count - ), "ClickHouse lost some messages: {}".format(result) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_commit_on_block_write( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "block" + get_topic_postfix(create_query_generator) - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - settings={"kafka_max_block_size": 100}, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {create_query}; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - cancel = threading.Event() - - # We need to pass i as a reference. Simple integers are passed by value. - # Making an array is probably the easiest way to "force pass by reference". - i = [0] - - def produce(i): - while not cancel.is_set(): - messages = [] - for _ in range(101): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - kafka_produce(kafka_cluster, topic_name, messages) - - kafka_thread = threading.Thread(target=produce, args=[i]) - kafka_thread.start() - - instance.query_with_retry( - "SELECT count() FROM test.view", - sleep_time=1, - check_callback=lambda res: int(res) >= 100, - ) - - cancel.set() - - instance.query("DROP TABLE test.kafka SYNC") - - instance.query(create_query) - kafka_thread.join() - - instance.query_with_retry( - "SELECT uniqExact(key) FROM test.view", - sleep_time=1, - check_callback=lambda res: int(res) >= i[0], - ) - - result = int(instance.query("SELECT count() == uniqExact(key) FROM test.view")) - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - kafka_thread.join() - - assert result == 1, "Messages from kafka get duplicated!" - - -@pytest.mark.parametrize( - "create_query_generator, log_line", - [ - (generate_old_create_table_query, "kafka.*Committed offset 2.*virt2_[01]"), - ( - generate_new_create_table_query, - r"kafka.*Saved offset 2 for topic-partition \[virt2_[01]:[0-9]+", - ), - ], -) -def test_kafka_virtual_columns2( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - log_line, -): - admin_client = get_admin_client(kafka_cluster) - - topic_config = { - # default retention, since predefined timestamp_ms is used. - "retention.ms": "-1", - } - thread_per_consumer = must_use_thread_per_consumer(create_query_generator) - topic_name_0 = "virt2_0" - topic_name_1 = "virt2_1" - consumer_group = "virt2" + get_topic_postfix(create_query_generator) - with kafka_topic(admin_client, topic_name_0, num_partitions=2, config=topic_config): - with kafka_topic( - admin_client, topic_name_1, num_partitions=2, config=topic_config - ): - create_query = create_query_generator( - "kafka", - "value UInt64", - topic_list=f"{topic_name_0},{topic_name_1}", - consumer_group=consumer_group, - settings={ - "kafka_num_consumers": 2, - "kafka_thread_per_consumer": thread_per_consumer, - }, - ) - - instance.query( - f""" - {create_query}; - - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS - SELECT value, _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp), toUnixTimestamp64Milli(_timestamp_ms), _headers.name, _headers.value FROM test.kafka; - """ - ) - - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) - - producer.send( - topic=topic_name_0, - value=json.dumps({"value": 1}), - partition=0, - key="k1", - timestamp_ms=1577836801001, - headers=[("content-encoding", b"base64")], - ) - producer.send( - topic=topic_name_0, - value=json.dumps({"value": 2}), - partition=0, - key="k2", - timestamp_ms=1577836802002, - headers=[ - ("empty_value", b""), - ("", b"empty name"), - ("", b""), - ("repetition", b"1"), - ("repetition", b"2"), - ], - ) - producer.flush() - - producer.send( - topic=topic_name_0, - value=json.dumps({"value": 3}), - partition=1, - key="k3", - timestamp_ms=1577836803003, - headers=[("b", b"b"), ("a", b"a")], - ) - producer.send( - topic=topic_name_0, - value=json.dumps({"value": 4}), - partition=1, - key="k4", - timestamp_ms=1577836804004, - headers=[("a", b"a"), ("b", b"b")], - ) - producer.flush() - - producer.send( - topic=topic_name_1, - value=json.dumps({"value": 5}), - partition=0, - key="k5", - timestamp_ms=1577836805005, - ) - producer.send( - topic=topic_name_1, - value=json.dumps({"value": 6}), - partition=0, - key="k6", - timestamp_ms=1577836806006, - ) - producer.flush() - - producer.send( - topic=topic_name_1, - value=json.dumps({"value": 7}), - partition=1, - key="k7", - timestamp_ms=1577836807007, - ) - producer.send( - topic=topic_name_1, - value=json.dumps({"value": 8}), - partition=1, - key="k8", - timestamp_ms=1577836808008, - ) - producer.flush() - - instance.wait_for_log_line(log_line, repetitions=4, look_behind_lines=6000) - - members = describe_consumer_group(kafka_cluster, consumer_group) - # pprint.pprint(members) - # members[0]['client_id'] = 'ClickHouse-instance-test-kafka-0' - # members[1]['client_id'] = 'ClickHouse-instance-test-kafka-1' - - result = instance.query( - "SELECT * FROM test.view ORDER BY value", ignore_error=True - ) - - expected = f"""\ - 1 k1 {topic_name_0} 0 0 1577836801 1577836801001 ['content-encoding'] ['base64'] - 2 k2 {topic_name_0} 0 1 1577836802 1577836802002 ['empty_value','','','repetition','repetition'] ['','empty name','','1','2'] - 3 k3 {topic_name_0} 1 0 1577836803 1577836803003 ['b','a'] ['b','a'] - 4 k4 {topic_name_0} 1 1 1577836804 1577836804004 ['a','b'] ['a','b'] - 5 k5 {topic_name_1} 0 0 1577836805 1577836805005 [] [] - 6 k6 {topic_name_1} 0 1 1577836806 1577836806006 [] [] - 7 k7 {topic_name_1} 1 0 1577836807 1577836807007 [] [] - 8 k8 {topic_name_1} 1 1 1577836808 1577836808008 [] [] - """ - - assert TSV(result) == TSV(expected) - - instance.query( - """ - DROP TABLE test.kafka; - DROP TABLE test.view; - """ - ) - instance.rotate_logs() - - -@pytest.mark.parametrize( - "create_query_generator, do_direct_read", - [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], -) -def test_kafka_producer_consumer_separate_settings( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - do_direct_read, -): - instance.rotate_logs() - instance.query( - create_query_generator( - "test_kafka", - "key UInt64", - topic_list="separate_settings", - consumer_group="test", - ) - ) - - if do_direct_read: - instance.query("SELECT * FROM test.test_kafka") - instance.query("INSERT INTO test.test_kafka VALUES (1)") - - assert instance.contains_in_log("Kafka producer created") - assert instance.contains_in_log("Created #0 consumer") - - kafka_conf_warnings = instance.grep_in_log("rdk:CONFWARN") - - assert kafka_conf_warnings is not None - - for warn in kafka_conf_warnings.strip().split("\n"): - # this setting was applied via old syntax and applied on both consumer - # and producer configurations - assert "heartbeat.interval.ms" in warn - - kafka_consumer_applied_properties = instance.grep_in_log("Consumer set property") - kafka_producer_applied_properties = instance.grep_in_log("Producer set property") - - assert kafka_consumer_applied_properties is not None - assert kafka_producer_applied_properties is not None - - # global settings should be applied for consumer and producer - global_settings = { - "debug": "topic,protocol,cgrp,consumer", - "statistics.interval.ms": "600", - } - - for name, value in global_settings.items(): - property_in_log = f"{name}:{value}" - assert property_in_log in kafka_consumer_applied_properties - assert property_in_log in kafka_producer_applied_properties - - settings_topic__separate_settings__consumer = {"session.timeout.ms": "6001"} - - for name, value in settings_topic__separate_settings__consumer.items(): - property_in_log = f"{name}:{value}" - assert property_in_log in kafka_consumer_applied_properties - assert property_in_log not in kafka_producer_applied_properties - - producer_settings = {"transaction.timeout.ms": "60001"} - - for name, value in producer_settings.items(): - property_in_log = f"{name}:{value}" - assert property_in_log not in kafka_consumer_applied_properties - assert property_in_log in kafka_producer_applied_properties - - # Should be ignored, because it is inside producer tag - producer_legacy_syntax__topic_separate_settings = {"message.timeout.ms": "300001"} - - for name, value in producer_legacy_syntax__topic_separate_settings.items(): - property_in_log = f"{name}:{value}" - assert property_in_log not in kafka_consumer_applied_properties - assert property_in_log not in kafka_producer_applied_properties - - # Old syntax, applied on consumer and producer - legacy_syntax__topic_separated_settings = {"heartbeat.interval.ms": "302"} - - for name, value in legacy_syntax__topic_separated_settings.items(): - property_in_log = f"{name}:{value}" - assert property_in_log in kafka_consumer_applied_properties - assert property_in_log in kafka_producer_applied_properties - - -@pytest.mark.parametrize( - "create_query_generator, log_line", - [ - (generate_new_create_table_query, "Saved offset 5"), - (generate_old_create_table_query, "Committed offset 5"), - ], -) -def test_kafka_produce_key_timestamp( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - log_line, -): - topic_name = "insert3" - topic_config = { - # default retention, since predefined timestamp_ms is used. - "retention.ms": "-1", - } - - with kafka_topic(get_admin_client(kafka_cluster), topic_name, config=topic_config): - writer_create_query = create_query_generator( - "kafka_writer", - "key UInt64, value UInt64, _key String, _timestamp DateTime('UTC')", - topic_list=topic_name, - consumer_group=topic_name, - format="TSV", - ) - reader_create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64, inserted_key String, inserted_timestamp DateTime('UTC')", - topic_list=topic_name, - consumer_group=topic_name, - format="TSV", - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {writer_create_query}; - {reader_create_query}; - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS - SELECT key, value, inserted_key, toUnixTimestamp(inserted_timestamp), _key, _topic, _partition, _offset, toUnixTimestamp(_timestamp) FROM test.kafka; - """ - ) - - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( - 1, 1, "k1", 1577836801 - ) - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( - 2, 2, "k2", 1577836802 - ) - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({})),({},{},'{}',toDateTime({}))".format( - 3, 3, "k3", 1577836803, 4, 4, "k4", 1577836804 - ) - ) - instance.query( - "INSERT INTO test.kafka_writer VALUES ({},{},'{}',toDateTime({}))".format( - 5, 5, "k5", 1577836805 - ) - ) - - # instance.wait_for_log_line(log_line) - - expected = """\ - 1 1 k1 1577836801 k1 insert3 0 0 1577836801 - 2 2 k2 1577836802 k2 insert3 0 1 1577836802 - 3 3 k3 1577836803 k3 insert3 0 2 1577836803 - 4 4 k4 1577836804 k4 insert3 0 3 1577836804 - 5 5 k5 1577836805 k5 insert3 0 4 1577836805 - """ - - result = instance.query_with_retry( - "SELECT * FROM test.view ORDER BY value", - ignore_error=True, - retry_count=5, - sleep_time=1, - check_callback=lambda res: TSV(res) == TSV(expected), - ) - - assert TSV(result) == TSV(expected) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_insert_avro( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - topic_config = { - # default retention, since predefined timestamp_ms is used. - "retention.ms": "-1", - } - topic_name = "avro1" + get_topic_postfix(create_query_generator) - with kafka_topic(admin_client, topic_name, config=topic_config): - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64, _timestamp DateTime('UTC')", - topic_list=topic_name, - consumer_group=topic_name, - format="Avro", - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - {create_query} - """ - ) - - instance.query( - "INSERT INTO test.kafka select number*10 as key, number*100 as value, 1636505534 as _timestamp from numbers(4) SETTINGS output_format_avro_rows_in_file = 2, output_format_avro_codec = 'deflate'" - ) - - message_count = 2 - messages = kafka_consume_with_retry( - kafka_cluster, - topic_name, - message_count, - need_decode=False, - timestamp=1636505534, - ) - - result = "" - for a_message in messages: - result += decode_avro(a_message) + "\n" - - expected_result = """{'key': 0, 'value': 0, '_timestamp': 1636505534} -{'key': 10, 'value': 100, '_timestamp': 1636505534} - -{'key': 20, 'value': 200, '_timestamp': 1636505534} -{'key': 30, 'value': 300, '_timestamp': 1636505534} - -""" - assert result == expected_result - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_produce_consume_avro( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "insert_avro" + get_topic_postfix(create_query_generator) - with kafka_topic(get_admin_client(kafka_cluster), topic_name): - num_rows = 75 - - writer_create_query = create_query_generator( - "kafka_writer", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format="Avro", - ) - - reader_create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format="Avro", - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka_writer; - - {writer_create_query}; - {reader_create_query}; - - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY tuple() AS - SELECT key, value FROM test.kafka; - """ - ) - - instance.query( - "INSERT INTO test.kafka_writer select number*10 as key, number*100 as value from numbers({num_rows}) SETTINGS output_format_avro_rows_in_file = 7".format( - num_rows=num_rows - ) - ) - - instance.wait_for_log_line( - "Committed offset {offset}".format(offset=math.ceil(num_rows / 7)) - ) - - expected_num_rows = instance.query( - "SELECT COUNT(1) FROM test.view", ignore_error=True - ) - assert int(expected_num_rows) == num_rows - - expected_max_key = instance.query( - "SELECT max(key) FROM test.view", ignore_error=True - ) - assert int(expected_max_key) == (num_rows - 1) * 10 - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_flush_by_time( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - topic_name = "flush_by_time" + get_topic_postfix(create_query_generator) - - with kafka_topic(admin_client, topic_name): - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 100, - }, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - {create_query}; - - CREATE TABLE test.view (key UInt64, value UInt64, ts DateTime64(3) MATERIALIZED now64(3)) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - cancel = threading.Event() - - def produce(): - while not cancel.is_set(): - messages = [json.dumps({"key": 0, "value": 0})] - kafka_produce(kafka_cluster, topic_name, messages) - time.sleep(0.8) - - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - # By default the flush timeout should be 7.5 seconds => 18 seconds should be enough for 2 flushes, but not for 3 - time.sleep(18) - - result = instance.query("SELECT uniqExact(ts), count() >= 15 FROM test.view") - - cancel.set() - kafka_thread.join() - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - assert TSV(result) == TSV("2 1") - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_flush_by_block_size( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "flush_by_block_size" + get_topic_postfix(create_query_generator) - - cancel = threading.Event() - - def produce(): - while not cancel.is_set(): - messages = [] - messages.append(json.dumps({"key": 0, "value": 0})) - kafka_produce(kafka_cluster, topic_name, messages) - - kafka_thread = threading.Thread(target=produce) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name): - kafka_thread.start() - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 100, - "kafka_poll_max_batch_size": 1, - "kafka_flush_interval_ms": 120000, - }, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - - {create_query}; - - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - # Wait for Kafka engine to consume this data - while 1 != int( - instance.query( - "SELECT count() FROM system.parts WHERE database = 'test' AND table = 'view' AND name = 'all_1_1_0'" - ) - ): - time.sleep(0.5) - - cancel.set() - kafka_thread.join() - - # more flushes can happens during test, we need to check only result of first flush (part named all_1_1_0). - result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'") - # logging.debug(result) - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - # 100 = first poll should return 100 messages (and rows) - # not waiting for stream_flush_interval_ms - assert ( - int(result) == 100 - ), "Messages from kafka should be flushed when block of size kafka_max_block_size is formed!" - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_lot_of_partitions_partial_commit_of_bulk( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - topic_name = "topic_with_multiple_partitions2" + get_topic_postfix( - create_query_generator - ) - with kafka_topic(admin_client, topic_name): - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 211, - "kafka_flush_interval_ms": 500, - }, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.consumer; - {create_query}; - CREATE TABLE test.view (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka; - """ - ) - - messages = [] - count = 0 - for dummy_msg in range(1000): - rows = [] - for dummy_row in range(random.randrange(3, 10)): - count = count + 1 - rows.append(json.dumps({"key": count, "value": count})) - messages.append("\n".join(rows)) - kafka_produce(kafka_cluster, topic_name, messages) - - instance.wait_for_log_line("kafka.*Stalled", repetitions=5) - - result = instance.query( - "SELECT count(), uniqExact(key), max(key) FROM test.view" - ) - logging.debug(result) - assert TSV(result) == TSV("{0}\t{0}\t{0}".format(count)) - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view; - """ - ) - - @pytest.mark.parametrize( "create_query_generator, log_line", [ @@ -2972,784 +640,6 @@ def test_kafka_rebalance( assert result == 1, "Messages from kafka get duplicated!" -# TODO(antaljanosbenjamin): find another way to make insertion fail -@pytest.mark.parametrize( - "create_query_generator", - [ - generate_old_create_table_query, - # generate_new_create_table_query, - ], -) -def test_kafka_no_holes_when_write_suffix_failed( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - topic_name = "no_holes_when_write_suffix_failed" + get_topic_postfix( - create_query_generator - ) - - with kafka_topic(admin_client, topic_name): - messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] - kafka_produce(kafka_cluster, topic_name, messages) - - create_query = create_query_generator( - "kafka", - "key UInt64, value String", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 20, - "kafka_flush_interval_ms": 2000, - }, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.view SYNC; - DROP TABLE IF EXISTS test.consumer; - - {create_query}; - - CREATE TABLE test.view (key UInt64, value String) - ENGINE = ReplicatedMergeTree('/clickhouse/kafkatest/tables/{topic_name}', 'node1') - ORDER BY key; - """ - ) - - # init PartitionManager (it starts container) earlier - pm = PartitionManager() - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka - WHERE NOT sleepEachRow(0.25); - """ - ) - - instance.wait_for_log_line("Polled batch of 20 messages") - # the tricky part here is that disconnect should happen after write prefix, but before write suffix - # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" - # while materialized view is working to inject zookeeper failure - pm.drop_instance_zk_connections(instance) - instance.wait_for_log_line( - "Error.*(Connection loss|Coordination::Exception).*while pushing to view" - ) - pm.heal_all() - instance.wait_for_log_line("Committed offset 22") - - result = instance.query( - "SELECT count(), uniqExact(key), max(key) FROM test.view" - ) - logging.debug(result) - - # kafka_cluster.open_bash_shell('instance') - - instance.query( - """ - DROP TABLE test.consumer; - DROP TABLE test.view SYNC; - """ - ) - - assert TSV(result) == TSV("22\t22\t22") - - -def test_exception_from_destructor( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'xyz', - kafka_group_name = '', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - """ - ) - instance.query_and_get_error( - """ - SELECT * FROM test.kafka; - """ - ) - instance.query( - """ - DROP TABLE test.kafka; - """ - ) - - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'xyz', - kafka_group_name = '', - kafka_format = 'JSONEachRow'; - """ - ) - instance.query( - """ - DROP TABLE test.kafka; - """ - ) - - # kafka_cluster.open_bash_shell('instance') - assert TSV(instance.query("SELECT 1")) == TSV("1") - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_commits_of_unprocessed_messages_on_drop( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "commits_of_unprocessed_messages_on_drop" + get_topic_postfix( - create_query_generator - ) - messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(1)] - - kafka_produce(kafka_cluster, topic_name, messages) - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=f"{topic_name}_test_group", - settings={ - "kafka_max_block_size": 1000, - "kafka_flush_interval_ms": 1000, - }, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.destination SYNC; - CREATE TABLE test.destination ( - key UInt64, - value UInt64, - _topic String, - _key String, - _offset UInt64, - _partition UInt64, - _timestamp Nullable(DateTime('UTC')), - _consumed_by LowCardinality(String) - ) - ENGINE = MergeTree() - ORDER BY key; - - {create_query}; - - CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS - SELECT - key, - value, - _topic, - _key, - _offset, - _partition, - _timestamp - FROM test.kafka; - """ - ) - - # Waiting for test.kafka_consumer to start consume - instance.wait_for_log_line("Committed offset [0-9]+") - - cancel = threading.Event() - - i = [2] - - def produce(): - while not cancel.is_set(): - messages = [] - for _ in range(113): - messages.append(json.dumps({"key": i[0], "value": i[0]})) - i[0] += 1 - kafka_produce(kafka_cluster, topic_name, messages) - time.sleep(0.5) - - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() - time.sleep(4) - - instance.query( - """ - DROP TABLE test.kafka SYNC; - """ - ) - - new_create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=f"{topic_name}_test_group", - settings={ - "kafka_max_block_size": 10000, - "kafka_flush_interval_ms": 1000, - }, - ) - instance.query(new_create_query) - - cancel.set() - instance.wait_for_log_line("kafka.*Stalled", repetitions=5) - - # kafka_cluster.open_bash_shell('instance') - # SELECT key, _timestamp, _offset FROM test.destination where runningDifference(key) <> 1 ORDER BY key; - - result = instance.query( - "SELECT count(), uniqExact(key), max(key) FROM test.destination" - ) - logging.debug(result) - - instance.query( - """ - DROP TABLE test.kafka_consumer SYNC; - DROP TABLE test.destination SYNC; - """ - ) - - kafka_thread.join() - assert TSV(result) == TSV("{0}\t{0}\t{0}".format(i[0] - 1)), "Missing data!" - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_bad_reschedule( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) - - messages = [json.dumps({"key": j + 1, "value": j + 1}) for j in range(20000)] - kafka_produce(kafka_cluster, topic_name, messages) - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 1000, - "kafka_flush_interval_ms": 1000, - }, - ) - instance.query( - f""" - {create_query}; - - CREATE MATERIALIZED VIEW test.destination ENGINE=MergeTree ORDER BY tuple() AS - SELECT - key, - now() as consume_ts, - value, - _topic, - _key, - _offset, - _partition, - _timestamp - FROM test.kafka; - """ - ) - - instance.wait_for_log_line("Committed offset 20000") - - assert ( - int( - instance.query( - "SELECT max(consume_ts) - min(consume_ts) FROM test.destination" - ) - ) - < 8 - ) - - -def test_kafka_duplicates_when_commit_failed( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - messages = [json.dumps({"key": j + 1, "value": "x" * 300}) for j in range(22)] - kafka_produce(kafka_cluster, "duplicates_when_commit_failed", messages) - - instance.query( - """ - DROP TABLE IF EXISTS test.view SYNC; - DROP TABLE IF EXISTS test.consumer SYNC; - - CREATE TABLE test.kafka (key UInt64, value String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'duplicates_when_commit_failed', - kafka_group_name = 'duplicates_when_commit_failed', - kafka_format = 'JSONEachRow', - kafka_max_block_size = 20, - kafka_flush_interval_ms = 1000; - - CREATE TABLE test.view (key UInt64, value String) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - instance.query( - """ - CREATE MATERIALIZED VIEW test.consumer TO test.view AS - SELECT * FROM test.kafka - WHERE NOT sleepEachRow(0.25); - """ - ) - - instance.wait_for_log_line("Polled batch of 20 messages") - # the tricky part here is that disconnect should happen after write prefix, but before we do commit - # we have 0.25 (sleepEachRow) * 20 ( Rows ) = 5 sec window after "Polled batch of 20 messages" - # while materialized view is working to inject zookeeper failure - kafka_cluster.pause_container("kafka1") - - # if we restore the connection too fast (<30sec) librdkafka will not report any timeout - # (alternative is to decrease the default session timeouts for librdkafka) - # - # when the delay is too long (>50sec) broker will decide to remove us from the consumer group, - # and will start answering "Broker: Unknown member" - instance.wait_for_log_line( - "Exception during commit attempt: Local: Waiting for coordinator", timeout=45 - ) - instance.wait_for_log_line("All commit attempts failed", look_behind_lines=500) - - kafka_cluster.unpause_container("kafka1") - - # kafka_cluster.open_bash_shell('instance') - instance.wait_for_log_line("Committed offset 22") - - result = instance.query("SELECT count(), uniqExact(key), max(key) FROM test.view") - logging.debug(result) - - instance.query( - """ - DROP TABLE test.consumer SYNC; - DROP TABLE test.view SYNC; - """ - ) - - # After https://github.com/edenhill/librdkafka/issues/2631 - # timeout triggers rebalance, making further commits to the topic after getting back online - # impossible. So we have a duplicate in that scenario, but we report that situation properly. - assert TSV(result) == TSV("42\t22\t22") - - -# if we came to partition end we will repeat polling until reaching kafka_max_block_size or flush_interval -# that behavior is a bit questionable - we can just take a bigger pauses between polls instead - -# to do more job in a single pass, and give more rest for a thread. -# But in cases of some peaky loads in kafka topic the current contract sounds more predictable and -# easier to understand, so let's keep it as is for now. -# also we can came to eof because we drained librdkafka internal queue too fast -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_premature_flush_on_eof( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "premature_flush_on_eof" + get_topic_postfix(create_query_generator) - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - ) - instance.query( - f""" - {create_query}; - CREATE TABLE test.destination ( - key UInt64, - value UInt64, - _topic String, - _key String, - _offset UInt64, - _partition UInt64, - _timestamp Nullable(DateTime('UTC')), - _consumed_by LowCardinality(String) - ) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - # messages created here will be consumed immediately after MV creation - # reaching topic EOF. - # But we should not do flush immediately after reaching EOF, because - # next poll can return more data, and we should respect kafka_flush_interval_ms - # and try to form bigger block - messages = [json.dumps({"key": 1, "value": 1})] - kafka_produce(kafka_cluster, topic_name, messages) - - instance.query( - """ - CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS - SELECT - key, - value, - _topic, - _key, - _offset, - _partition, - _timestamp - FROM test.kafka; - """ - ) - - # all subscriptions/assignments done during select, so it start sending data to test.destination - # immediately after creation of MV - - instance.wait_for_log_line("Polled batch of 1 messages") - instance.wait_for_log_line("Stalled") - - # produce more messages after delay - kafka_produce(kafka_cluster, topic_name, messages) - - # data was not flushed yet (it will be flushed 7.5 sec after creating MV) - assert int(instance.query("SELECT count() FROM test.destination")) == 0 - - instance.wait_for_log_line("Committed offset 2") - - # it should be single part, i.e. single insert - result = instance.query( - "SELECT _part, count() FROM test.destination group by _part" - ) - assert TSV(result) == TSV("all_1_1_0\t2") - - instance.query( - """ - DROP TABLE test.kafka_consumer; - DROP TABLE test.destination; - """ - ) - - -@pytest.mark.parametrize( - "create_query_generator, do_direct_read", - [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], -) -def test_kafka_unavailable( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - do_direct_read, -): - number_of_messages = 20000 - topic_name = "test_bad_reschedule" + get_topic_postfix(create_query_generator) - messages = [ - json.dumps({"key": j + 1, "value": j + 1}) for j in range(number_of_messages) - ] - kafka_produce(kafka_cluster, topic_name, messages) - - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - kafka_cluster.pause_container("kafka1") - - create_query = create_query_generator( - "test_bad_reschedule", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - settings={"kafka_max_block_size": 1000}, - ) - instance.query( - f""" - {create_query}; - - CREATE MATERIALIZED VIEW test.destination_unavailable ENGINE=MergeTree ORDER BY tuple() AS - SELECT - key, - now() as consume_ts, - value, - _topic, - _key, - _offset, - _partition, - _timestamp - FROM test.test_bad_reschedule; - """ - ) - - if do_direct_read: - instance.query("SELECT * FROM test.test_bad_reschedule") - instance.query("SELECT count() FROM test.destination_unavailable") - - # enough to trigger issue - time.sleep(30) - kafka_cluster.unpause_container("kafka1") - - result = instance.query_with_retry( - "SELECT count() FROM test.destination_unavailable", - sleep_time=1, - check_callback=lambda res: int(res) == number_of_messages, - ) - - assert int(result) == number_of_messages - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_issue14202( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - """ - INSERT INTO Kafka Engine from an empty SELECT sub query was leading to failure - """ - - topic_name = "issue14202" + get_topic_postfix(create_query_generator) - create_query = create_query_generator( - "kafka_q", - "t UInt64, some_string String", - topic_list=topic_name, - consumer_group=topic_name, - ) - instance.query( - f""" - CREATE TABLE test.empty_table ( - dt Date, - some_string String - ) - ENGINE = MergeTree() - PARTITION BY toYYYYMM(dt) - ORDER BY some_string; - - {create_query}; - """ - ) - - instance.query( - "INSERT INTO test.kafka_q SELECT t, some_string FROM ( SELECT dt AS t, some_string FROM test.empty_table )" - ) - # check instance is alive - assert TSV(instance.query("SELECT 1")) == TSV("1") - instance.query( - """ - DROP TABLE test.empty_table; - DROP TABLE test.kafka_q; - """ - ) - - -def test_kafka_csv_with_thread_per_consumer( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - instance.query( - """ - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = 'csv_with_thread_per_consumer', - kafka_group_name = 'csv_with_thread_per_consumer', - kafka_format = 'CSV', - kafka_row_delimiter = '\\n', - kafka_num_consumers = 4, - kafka_commit_on_select = 1, - kafka_thread_per_consumer = 1; - """ - ) - - messages = [] - for i in range(50): - messages.append("{i}, {i}".format(i=i)) - kafka_produce(kafka_cluster, "csv_with_thread_per_consumer", messages) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - - kafka_check_result(result, True) - - -def random_string(size=8): - return "".join(random.choices(string.ascii_uppercase + string.digits, k=size)) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_engine_put_errors_to_stream( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "kafka_engine_put_errors_to_stream" + get_topic_postfix( - create_query_generator - ) - create_query = create_query_generator( - "kafka", - "i Int64, s String", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 128, - "kafka_handle_error_mode": "stream", - }, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka_data; - DROP TABLE IF EXISTS test.kafka_errors; - {create_query}; - CREATE MATERIALIZED VIEW test.kafka_data (i Int64, s String) - ENGINE = MergeTree - ORDER BY i - AS SELECT i, s FROM test.kafka WHERE length(_error) == 0; - CREATE MATERIALIZED VIEW test.kafka_errors (topic String, partition Int64, offset Int64, raw String, error String) - ENGINE = MergeTree - ORDER BY (topic, offset) - AS SELECT - _topic AS topic, - _partition AS partition, - _offset AS offset, - _raw_message AS raw, - _error AS error - FROM test.kafka WHERE length(_error) > 0; - """ - ) - - messages = [] - for i in range(128): - if i % 2 == 0: - messages.append(json.dumps({"i": i, "s": random_string(8)})) - else: - # Unexpected json content for table test.kafka. - messages.append( - json.dumps({"i": "n_" + random_string(4), "s": random_string(8)}) - ) - - kafka_produce(kafka_cluster, topic_name, messages) - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - instance.wait_for_log_line("Committed offset 128") - - assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("64") - assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") - - instance.query( - """ - DROP TABLE test.kafka; - DROP TABLE test.kafka_data; - DROP TABLE test.kafka_errors; - """ - ) - - -def gen_normal_json(): - return '{"i":1000, "s":"ABC123abc"}' - - -def gen_malformed_json(): - return '{"i":"n1000", "s":"1000"}' - - -def gen_message_with_jsons(jsons=10, malformed=0): - s = io.StringIO() - - # we don't care on which position error will be added - # (we skip whole broken message), but we need to be - # sure that at least one error will be added, - # otherwise test will fail. - error_pos = random.randint(0, jsons - 1) - - for i in range(jsons): - if malformed and i == error_pos: - s.write(gen_malformed_json()) - else: - s.write(gen_normal_json()) - s.write(" ") - return s.getvalue() - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_engine_put_errors_to_stream_with_random_malformed_json( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = ( - "kafka_engine_put_errors_to_stream_with_random_malformed_json" - + get_topic_postfix(create_query_generator) - ) - create_query = create_query_generator( - "kafka", - "i Int64, s String", - topic_list=topic_name, - consumer_group=topic_name, - settings={ - "kafka_max_block_size": 100, - "kafka_poll_max_batch_size": 1, - "kafka_handle_error_mode": "stream", - }, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka_data; - DROP TABLE IF EXISTS test.kafka_errors; - {create_query}; - CREATE MATERIALIZED VIEW test.kafka_data (i Int64, s String) - ENGINE = MergeTree - ORDER BY i - AS SELECT i, s FROM test.kafka WHERE length(_error) == 0; - CREATE MATERIALIZED VIEW test.kafka_errors (topic String, partition Int64, offset Int64, raw String, error String) - ENGINE = MergeTree - ORDER BY (topic, offset) - AS SELECT - _topic AS topic, - _partition AS partition, - _offset AS offset, - _raw_message AS raw, - _error AS error - FROM test.kafka WHERE length(_error) > 0; - """ - ) - - messages = [] - for i in range(128): - if i % 2 == 0: - messages.append(gen_message_with_jsons(10, 1)) - else: - messages.append(gen_message_with_jsons(10, 0)) - - kafka_produce(kafka_cluster, topic_name, messages) - with existing_kafka_topic(get_admin_client(kafka_cluster), topic_name): - instance.wait_for_log_line("Committed offset 128") - # 64 good messages, each containing 10 rows - assert TSV(instance.query("SELECT count() FROM test.kafka_data")) == TSV("640") - # 64 bad messages, each containing some broken row - assert TSV(instance.query("SELECT count() FROM test.kafka_errors")) == TSV("64") - - instance.query( - """ - DROP TABLE test.kafka; - DROP TABLE test.kafka_data; - DROP TABLE test.kafka_errors; - """ - ) - - @pytest.mark.parametrize( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], @@ -4089,787 +979,75 @@ def test_kafka_formats_with_broken_message( @pytest.mark.parametrize( "create_query_generator", - [ - generate_old_create_table_query, - generate_new_create_table_query, - ], + [generate_old_create_table_query, generate_new_create_table_query], ) -def test_kafka_consumer_failover( +def test_kafka_flush_by_time( kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance, create_query_generator, -): - topic_name = "kafka_consumer_failover" + get_topic_postfix(create_query_generator) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name, num_partitions=2): - consumer_group = f"{topic_name}_group" - create_queries = [] - for counter in range(3): - create_queries.append( - create_query_generator( - f"kafka{counter+1}", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=consumer_group, - settings={ - "kafka_max_block_size": 1, - "kafka_poll_timeout_ms": 200, - }, - ) - ) - - instance.query( - f""" - {create_queries[0]}; - {create_queries[1]}; - {create_queries[2]}; - - CREATE TABLE test.destination ( - key UInt64, - value UInt64, - _consumed_by LowCardinality(String) - ) - ENGINE = MergeTree() - ORDER BY key; - - CREATE MATERIALIZED VIEW test.kafka1_mv TO test.destination AS - SELECT key, value, 'kafka1' as _consumed_by - FROM test.kafka1; - - CREATE MATERIALIZED VIEW test.kafka2_mv TO test.destination AS - SELECT key, value, 'kafka2' as _consumed_by - FROM test.kafka2; - - CREATE MATERIALIZED VIEW test.kafka3_mv TO test.destination AS - SELECT key, value, 'kafka3' as _consumed_by - FROM test.kafka3; - """ - ) - - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) - - ## all 3 attached, 2 working - producer.send( - topic=topic_name, - value=json.dumps({"key": 1, "value": 1}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 1, "value": 1}), - partition=1, - ) - producer.flush() - - count_query = "SELECT count() FROM test.destination" - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > 0 - ) - - ## 2 attached, 2 working - instance.query("DETACH TABLE test.kafka1") - producer.send( - topic=topic_name, - value=json.dumps({"key": 2, "value": 2}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 2, "value": 2}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - ## 1 attached, 1 working - instance.query("DETACH TABLE test.kafka2") - producer.send( - topic=topic_name, - value=json.dumps({"key": 3, "value": 3}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 3, "value": 3}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - ## 2 attached, 2 working - instance.query("ATTACH TABLE test.kafka1") - producer.send( - topic=topic_name, - value=json.dumps({"key": 4, "value": 4}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 4, "value": 4}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - ## 1 attached, 1 working - instance.query("DETACH TABLE test.kafka3") - producer.send( - topic=topic_name, - value=json.dumps({"key": 5, "value": 5}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 5, "value": 5}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - ## 2 attached, 2 working - instance.query("ATTACH TABLE test.kafka2") - producer.send( - topic=topic_name, - value=json.dumps({"key": 6, "value": 6}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 6, "value": 6}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - ## 3 attached, 2 working - instance.query("ATTACH TABLE test.kafka3") - producer.send( - topic=topic_name, - value=json.dumps({"key": 7, "value": 7}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 7, "value": 7}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - ## 2 attached, same 2 working - instance.query("DETACH TABLE test.kafka3") - producer.send( - topic=topic_name, - value=json.dumps({"key": 8, "value": 8}), - partition=0, - ) - producer.send( - topic=topic_name, - value=json.dumps({"key": 8, "value": 8}), - partition=1, - ) - producer.flush() - prev_count = instance.query_with_retry( - count_query, check_callback=lambda res: int(res) > prev_count - ) - - -def test_kafka_predefined_configuration( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance ): admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) - topic_name = "conf" - kafka_create_topic(admin_client, topic_name) + topic_name = "flush_by_time" + get_topic_postfix(create_query_generator) - messages = [] - for i in range(50): - messages.append("{i}, {i}".format(i=i)) - kafka_produce(kafka_cluster, topic_name, messages) - - instance.query( - f""" - CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka(kafka1, kafka_format='CSV'); - """ - ) - - result = "" - while True: - result += instance.query("SELECT * FROM test.kafka", ignore_error=True) - if kafka_check_result(result): - break - kafka_check_result(result, True) - - -# https://github.com/ClickHouse/ClickHouse/issues/26643 -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_issue26643( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - ) - topic_name = "test_issue26643" + get_topic_postfix(create_query_generator) - thread_per_consumer = must_use_thread_per_consumer(create_query_generator) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name): - msg = message_with_repeated_pb2.Message( - tnow=1629000000, - server="server1", - clien="host1", - sPort=443, - cPort=50000, - r=[ - message_with_repeated_pb2.dd( - name="1", type=444, ttl=123123, data=b"adsfasd" - ), - message_with_repeated_pb2.dd(name="2"), - ], - method="GET", - ) - - data = b"" - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - - msg = message_with_repeated_pb2.Message(tnow=1629000002) - - serialized_msg = msg.SerializeToString() - data = data + _VarintBytes(len(serialized_msg)) + serialized_msg - - producer.send(topic_name, value=data) - - data = _VarintBytes(len(serialized_msg)) + serialized_msg - producer.send(topic_name, value=data) - producer.flush() - - create_query = create_query_generator( - "test_queue", - """`tnow` UInt32, - `server` String, - `client` String, - `sPort` UInt16, - `cPort` UInt16, - `r.name` Array(String), - `r.class` Array(UInt16), - `r.type` Array(UInt16), - `r.ttl` Array(UInt32), - `r.data` Array(String), - `method` String""", - topic_list=topic_name, - consumer_group=f"{topic_name}_group", - format="Protobuf", - settings={ - "kafka_schema": "message_with_repeated.proto:Message", - "kafka_skip_broken_messages": 10000, - "kafka_thread_per_consumer": thread_per_consumer, - }, - ) - - instance.query( - f""" - {create_query}; - - SET allow_suspicious_low_cardinality_types=1; - - CREATE TABLE test.log - ( - `tnow` DateTime('Asia/Istanbul') CODEC(DoubleDelta, LZ4), - `server` LowCardinality(String), - `client` LowCardinality(String), - `sPort` LowCardinality(UInt16), - `cPort` UInt16 CODEC(T64, LZ4), - `r.name` Array(String), - `r.class` Array(LowCardinality(UInt16)), - `r.type` Array(LowCardinality(UInt16)), - `r.ttl` Array(LowCardinality(UInt32)), - `r.data` Array(String), - `method` LowCardinality(String) - ) - ENGINE = MergeTree - PARTITION BY toYYYYMMDD(tnow) - ORDER BY (tnow, server) - TTL toDate(tnow) + toIntervalMonth(1000) - SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; - - CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS - SELECT - toDateTime(a.tnow) AS tnow, - a.server AS server, - a.client AS client, - a.sPort AS sPort, - a.cPort AS cPort, - a.`r.name` AS `r.name`, - a.`r.class` AS `r.class`, - a.`r.type` AS `r.type`, - a.`r.ttl` AS `r.ttl`, - a.`r.data` AS `r.data`, - a.method AS method - FROM test.test_queue AS a; - """ - ) - - instance.wait_for_log_line("Committed offset") - result = instance.query("SELECT * FROM test.log") - - expected = """\ - 2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET - 2021-08-15 07:00:02 0 0 [] [] [] [] [] - 2021-08-15 07:00:02 0 0 [] [] [] [] [] - """ - assert TSV(result) == TSV(expected) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_num_consumers_limit( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - instance.query("DROP TABLE IF EXISTS test.kafka") - - thread_per_consumer = must_use_thread_per_consumer(create_query_generator) - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - settings={ - "kafka_num_consumers": 100, - "kafka_thread_per_consumer": thread_per_consumer, - }, - ) - error = instance.query_and_get_error(create_query) - - assert ( - "BAD_ARGUMENTS" in error - and "The number of consumers can not be bigger than" in error - ) - - instance.query( - f""" - SET kafka_disable_num_consumers_limit = 1; - {create_query}; - """ - ) - - instance.query("DROP TABLE test.kafka") - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_format_with_prefix_and_suffix( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "custom" + get_topic_postfix(create_query_generator) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name): + with kafka_topic(admin_client, topic_name): create_query = create_query_generator( "kafka", "key UInt64, value UInt64", topic_list=topic_name, consumer_group=topic_name, - format="CustomSeparated", - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - {create_query}; - """ - ) - - instance.query( - "INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers(2) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - message_count = 2 - messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) - - assert len(messages) == 2 - - assert ( - "".join(messages) - == "\n0\t0\n\n\n10\t100\n\n" - ) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_max_rows_per_message( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "custom_max_rows_per_message" + get_topic_postfix( - create_query_generator - ) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name): - num_rows = 5 - - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format="CustomSeparated", settings={ - "format_custom_result_before_delimiter": "\n", - "format_custom_result_after_delimiter": "\n", - "kafka_max_rows_per_message": 3, + "kafka_max_block_size": 100, }, ) instance.query( f""" DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.kafka; - {create_query}; - - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.kafka; - """ - ) - - instance.query( - f"INSERT INTO test.kafka select number*10 as key, number*100 as value from numbers({num_rows}) settings format_custom_result_before_delimiter='\n', format_custom_result_after_delimiter='\n'" - ) - - message_count = 2 - messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) - - assert len(messages) == message_count - - assert ( - "".join(messages) - == "\n0\t0\n10\t100\n20\t200\n\n\n30\t300\n40\t400\n\n" - ) - - instance.query_with_retry( - "SELECT count() FROM test.view", - check_callback=lambda res: int(res) == num_rows, - ) - - result = instance.query("SELECT * FROM test.view") - assert result == "0\t0\n10\t100\n20\t200\n30\t300\n40\t400\n" - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_row_based_formats( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - admin_client = get_admin_client(kafka_cluster) - - formats_to_test = [ - "TSV", - "TSVWithNamesAndTypes", - "TSKV", - "CSV", - "CSVWithNamesAndTypes", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "Avro", - "RowBinary", - "RowBinaryWithNamesAndTypes", - "MsgPack", - ] - - num_rows = 10 - max_rows_per_message = 5 - message_count = num_rows / max_rows_per_message - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - - for format_name in formats_to_test: - logging.debug("Setting up {format_name}") - - topic_name = format_name + get_topic_postfix(create_query_generator) - table_name = f"kafka_{format_name}" - - kafka_create_topic(admin_client, topic_name) - - create_query = create_query_generator( - table_name, - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format=format_name, - settings={"kafka_max_rows_per_message": max_rows_per_message}, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.{table_name}_view; - DROP TABLE IF EXISTS test.{table_name}; + DROP TABLE IF EXISTS test.consumer; {create_query}; - CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; + CREATE TABLE test.view (key UInt64, value UInt64, ts DateTime64(3) MATERIALIZED now64(3)) + ENGINE = MergeTree() + ORDER BY key; """ ) - for format_name in formats_to_test: - logging.debug("Inserting to {format_name}") + cancel = threading.Event() + + def produce(): + while not cancel.is_set(): + messages = [json.dumps({"key": 0, "value": 0})] + kafka_produce(kafka_cluster, topic_name, messages) + time.sleep(0.8) + + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() - topic_name = format_name + get_topic_postfix(create_query_generator) - table_name = f"kafka_{format_name}" instance.query( - f"INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows})" - ) - - for format_name in formats_to_test: - topic_name = format_name + get_topic_postfix(create_query_generator) - table_name = f"kafka_{format_name}" - messages = kafka_consume_with_retry( - kafka_cluster, topic_name, message_count, need_decode=False - ) - - assert len(messages) == message_count - - result = instance.query_with_retry( - f"SELECT * FROM test.{table_name}_view", - check_callback=lambda res: res == expected, - ) - - assert result == expected - kafka_delete_topic(admin_client, topic_name) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_block_based_formats_1( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "pretty_space" + get_topic_postfix(create_query_generator) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name): - create_query = create_query_generator( - "kafka", - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format="PrettySpace", - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - - {create_query}; - - INSERT INTO test.kafka SELECT number * 10 as key, number * 100 as value FROM numbers(5) settings max_block_size=2, optimize_trivial_insert_select=0, output_format_pretty_color=1, output_format_pretty_row_numbers=0; - """ - ) - - message_count = 3 - messages = kafka_consume_with_retry(kafka_cluster, topic_name, message_count) - assert len(messages) == 3 - - data = [] - for message in messages: - splitted = message.split("\n") - assert splitted[0] == " \x1b[1mkey\x1b[0m \x1b[1mvalue\x1b[0m" - assert splitted[1] == "" - assert splitted[-1] == "" - data += [line.split() for line in splitted[2:-1]] - - assert data == [ - ["0", "0"], - ["10", "100"], - ["20", "200"], - ["30", "300"], - ["40", "400"], - ] - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_block_based_formats_2( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - admin_client = get_admin_client(kafka_cluster) - num_rows = 100 - message_count = 9 - - for format_name in [ - "JSONColumns", - "Native", - "Arrow", - "Parquet", - "ORC", - "JSONCompactColumns", - ]: - topic_name = format_name + get_topic_postfix(create_query_generator) - table_name = f"kafka_{format_name}" - logging.debug(f"Checking format {format_name}") - with kafka_topic(admin_client, topic_name): - create_query = create_query_generator( - table_name, - "key UInt64, value UInt64", - topic_list=topic_name, - consumer_group=topic_name, - format=format_name, - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.{table_name}; - - {create_query}; - - CREATE MATERIALIZED VIEW test.view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; - - INSERT INTO test.{table_name} SELECT number * 10 as key, number * 100 as value FROM numbers({num_rows}) settings max_block_size=12, optimize_trivial_insert_select=0; """ - ) - messages = kafka_consume_with_retry( - kafka_cluster, topic_name, message_count, need_decode=False - ) - assert len(messages) == message_count - - rows = int( - instance.query_with_retry( - "SELECT count() FROM test.view", - check_callback=lambda res: int(res) == num_rows, - ) - ) - - assert rows == num_rows - - result = instance.query("SELECT * FROM test.view ORDER by key") - expected = "" - for i in range(num_rows): - expected += str(i * 10) + "\t" + str(i * 100) + "\n" - assert result == expected - - -def test_system_kafka_consumers( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - topic = "system_kafka_cons" - kafka_create_topic(admin_client, topic) - - # Check that format_csv_delimiter parameter works now - as part of all available format settings. - kafka_produce( - kafka_cluster, - topic, - ["1|foo", "2|bar", "42|answer", "100|multi\n101|row\n103|message"], - ) - - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - - CREATE TABLE test.kafka (a UInt64, b String) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_commit_on_select = 1, - kafka_format = 'CSV', - kafka_row_delimiter = '\\n', - format_csv_delimiter = '|'; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.kafka; """ - ) + ) - result = instance.query("SELECT * FROM test.kafka ORDER BY a;") + # By default the flush timeout should be 7.5 seconds => 18 seconds should be enough for 2 flushes, but not for 3 + time.sleep(18) - result_system_kafka_consumers = instance.query( + result = instance.query("SELECT uniqExact(ts), count() >= 15 FROM test.view") + + cancel.set() + kafka_thread.join() + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; """ - create or replace function stable_timestamp as - (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); + ) - SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, - assignments.current_offset, - if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, - if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, - stable_timestamp(last_poll_time) as last_poll_time_, num_messages_read, stable_timestamp(last_commit_time) as last_commit_time_, - num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, - num_rebalance_revocations, num_rebalance_assignments, is_currently_used - FROM system.kafka_consumers WHERE database='test' and table='kafka' format Vertical; - """ - ) - logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") - assert ( - result_system_kafka_consumers - == """Row 1: -────── -database: test -table: kafka -length(consumer_id): 67 -assignments.topic: ['system_kafka_cons'] -assignments.partition_id: [0] -assignments.current_offset: [4] -last_exception_time_: never -last_exception_: no exception -last_poll_time_: now -num_messages_read: 4 -last_commit_time_: now -num_commits: 1 -last_rebalance_time_: never -num_rebalance_revocations: 0 -num_rebalance_assignments: 1 -is_currently_used: 0 -""" - ) - - instance.query("DROP TABLE test.kafka") - kafka_delete_topic(admin_client, topic) + assert TSV(result) == TSV("2 1") def test_system_kafka_consumers_rebalance( @@ -4993,274 +1171,86 @@ is_currently_used: 0 kafka_delete_topic(admin_client, topic) -def test_system_kafka_consumers_rebalance_mv( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance, max_retries=15 +def test_kafka_consumer_hang( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance ): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) + admin_client = get_admin_client(kafka_cluster) - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) - - topic = "system_kafka_cons_mv" - kafka_create_topic(admin_client, topic, num_partitions=2) + topic_name = "consumer_hang" + kafka_create_topic(admin_client, topic_name, num_partitions=8) instance.query( f""" DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka2; - DROP TABLE IF EXISTS test.kafka_persistent; - DROP TABLE IF EXISTS test.kafka_persistent2; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; CREATE TABLE test.kafka (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - - CREATE TABLE test.kafka2 (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_commit_on_select = 1, - kafka_group_name = '{topic}', - kafka_format = 'JSONEachRow'; - - CREATE TABLE test.kafka_persistent (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - CREATE TABLE test.kafka_persistent2 (key UInt64, value UInt64) - ENGINE = MergeTree() - ORDER BY key; - - CREATE MATERIALIZED VIEW test.persistent_kafka_mv TO test.kafka_persistent AS - SELECT key, value - FROM test.kafka; - - CREATE MATERIALIZED VIEW test.persistent_kafka_mv2 TO test.kafka_persistent2 AS - SELECT key, value - FROM test.kafka2; + kafka_topic_list = '{topic_name}', + kafka_group_name = '{topic_name}', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 8; + CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = Memory(); + CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka; """ ) - producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) - producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) - time.sleep(3) + instance.wait_for_log_line("kafka.*Stalled", repetitions=20) - retries = 0 - result_rdkafka_stat = "" - while True: - result_rdkafka_stat = instance.query( - """ - SELECT table, JSONExtractString(rdkafka_stat, 'type') - FROM system.kafka_consumers WHERE database='test' and table = 'kafka' format Vertical; - """ - ) - if result_rdkafka_stat.find("consumer") or retries > max_retries: - break - retries += 1 - time.sleep(1) - - assert ( - result_rdkafka_stat - == """Row 1: -────── -table: kafka -JSONExtractString(rdkafka_stat, 'type'): consumer -""" - ) + # This should trigger heartbeat fail, + # which will trigger REBALANCE_IN_PROGRESS, + # and which can lead to consumer hang. + kafka_cluster.pause_container("kafka1") + instance.wait_for_log_line("heartbeat error") + kafka_cluster.unpause_container("kafka1") + # logging.debug("Attempt to drop") instance.query("DROP TABLE test.kafka") - instance.query("DROP TABLE test.kafka2") - instance.query("DROP TABLE test.kafka_persistent") - instance.query("DROP TABLE test.kafka_persistent2") - instance.query("DROP TABLE test.persistent_kafka_mv") - instance.query("DROP TABLE test.persistent_kafka_mv2") - kafka_delete_topic(admin_client, topic) + # kafka_cluster.open_bash_shell('instance') - -def test_formats_errors(kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance): - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + """ ) - formats_to_test = [ - "Template", - "Regexp", - "TSV", - "TSVWithNamesAndTypes", - "TSKV", - "CSV", - "CSVWithNames", - "CSVWithNamesAndTypes", - "CustomSeparated", - "CustomSeparatedWithNames", - "CustomSeparatedWithNamesAndTypes", - "Values", - "JSON", - "JSONEachRow", - "JSONStringsEachRow", - "JSONCompactEachRow", - "JSONCompactEachRowWithNamesAndTypes", - "JSONObjectEachRow", - "Avro", - "RowBinary", - "RowBinaryWithNamesAndTypes", - "MsgPack", - "JSONColumns", - "JSONCompactColumns", - "JSONColumnsWithMetadata", - "BSONEachRow", - "Native", - "Arrow", - "Parquet", - "ORC", - "Npy", - "ParquetMetadata", - "CapnProto", - "Protobuf", - "ProtobufSingle", - "ProtobufList", - "DWARF", - "HiveText", - "MySQLDump", - ] + # original problem appearance was a sequence of the following messages in librdkafka logs: + # BROKERFAIL -> |ASSIGN| -> REBALANCE_IN_PROGRESS -> "waiting for rebalance_cb" (repeated forever) + # so it was waiting forever while the application will execute queued rebalance callback - for format_name in formats_to_test: - kafka_create_topic(admin_client, format_name) - table_name = f"kafka_{format_name}" - - instance.query( - f""" - DROP TABLE IF EXISTS test.view; - DROP TABLE IF EXISTS test.{table_name}; - - CREATE TABLE test.{table_name} (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{format_name}', - kafka_group_name = '{format_name}', - kafka_format = '{format_name}', - kafka_max_rows_per_message = 5, - format_template_row='template_row.format', - format_regexp='id: (.+?)', - input_format_with_names_use_header=0, - format_schema='key_value_message:Message'; - - CREATE MATERIALIZED VIEW test.{table_name}_view ENGINE=MergeTree ORDER BY (key, value) AS - SELECT key, value FROM test.{table_name}; - """ - ) - - kafka_produce( - kafka_cluster, - format_name, - ["Broken message\nBroken message\nBroken message\n"], - ) - - for format_name in formats_to_test: - table_name = f"kafka_{format_name}" - num_errors = int( - instance.query_with_retry( - f"SELECT length(exceptions.text) from system.kafka_consumers where database = 'test' and table = '{table_name}'", - check_callback=lambda res: int(res) > 0, + # from a user perspective: we expect no hanging 'drop' queries + # 'dr'||'op' to avoid self matching + assert ( + int( + instance.query( + "select count() from system.processes where position(lower(query),'dr'||'op')>0" ) ) + == 0 + ) - assert num_errors > 0, f"Failed {format_name}" + # cleanup unread messages so kafka will not wait reading consumers to delete topic + instance.query( + f""" + CREATE TABLE test.kafka (key UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic_name}', + kafka_commit_on_select = 1, + kafka_group_name = '{topic_name}', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 8; + """ + ) - instance.query(f"DROP TABLE test.{table_name}") - instance.query(f"DROP TABLE test.{table_name}_view") - kafka_delete_topic(admin_client, format_name) - - -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_multiple_read_in_materialized_views( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - topic_name = "multiple_read_from_mv" + get_topic_postfix(create_query_generator) - - with kafka_topic(get_admin_client(kafka_cluster), topic_name): - create_query = create_query_generator( - "kafka_multiple_read_input", - "id Int64", - topic_list=topic_name, - consumer_group=topic_name, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka_multiple_read_input SYNC; - DROP TABLE IF EXISTS test.kafka_multiple_read_table; - DROP TABLE IF EXISTS test.kafka_multiple_read_mv; - - {create_query}; - - CREATE TABLE test.kafka_multiple_read_table (id Int64) - ENGINE = MergeTree - ORDER BY id; - - - CREATE MATERIALIZED VIEW test.kafka_multiple_read_mv TO test.kafka_multiple_read_table AS - SELECT id - FROM test.kafka_multiple_read_input - WHERE id NOT IN ( - SELECT id - FROM test.kafka_multiple_read_table - WHERE id IN ( - SELECT id - FROM test.kafka_multiple_read_input - ) - ); - """ - ) - - kafka_produce( - kafka_cluster, topic_name, [json.dumps({"id": 42}), json.dumps({"id": 43})] - ) - - expected_result = "42\n43\n" - res = instance.query_with_retry( - f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", - check_callback=lambda res: res == expected_result, - ) - assert res == expected_result - - # Verify that the query deduplicates the records as it meant to be - messages = [] - for _ in range(0, 10): - messages.append(json.dumps({"id": 42})) - messages.append(json.dumps({"id": 43})) - - messages.append(json.dumps({"id": 44})) - - kafka_produce(kafka_cluster, topic_name, messages) - - expected_result = "42\n43\n44\n" - res = instance.query_with_retry( - f"SELECT id FROM test.kafka_multiple_read_table ORDER BY id", - check_callback=lambda res: res == expected_result, - ) - assert res == expected_result - - instance.query( - f""" - DROP TABLE test.kafka_multiple_read_input; - DROP TABLE test.kafka_multiple_read_table; - DROP TABLE test.kafka_multiple_read_mv; - """ - ) + num_read = int(instance.query("SELECT count() FROM test.kafka")) + logging.debug(f"read {num_read} from {topic_name} before delete") + instance.query("DROP TABLE test.kafka") + kafka_delete_topic(admin_client, topic_name) @pytest.mark.parametrize( @@ -5327,6 +1317,503 @@ def test_kafka_null_message( ) +# sequential read from different consumers leads to breaking lot of kafka invariants +# (first consumer will get all partitions initially, and may have problems in doing polls every 60 sec) +def test_kafka_read_consumers_in_parallel( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + admin_client = get_admin_client(kafka_cluster) + + topic_name = "read_consumers_in_parallel" + kafka_create_topic(admin_client, topic_name, num_partitions=8) + + cancel = threading.Event() + + def produce(): + while not cancel.is_set(): + messages = [] + for _ in range(100): + messages.append(json.dumps({"key": 0, "value": 0})) + kafka_produce(kafka_cluster, "read_consumers_in_parallel", messages) + time.sleep(1) + + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() + + # when we have more than 1 consumer in a single table, + # and kafka_thread_per_consumer=0 + # all the consumers should be read in parallel, not in sequence. + # then reading in parallel 8 consumers with 1 seconds kafka_poll_timeout_ms and less than 1 sec limit + # we should have exactly 1 poll per consumer (i.e. 8 polls) every 1 seconds (from different threads) + # in case parallel consuming is not working we will have only 1 poll every 1 seconds (from the same thread). + instance.query( + f""" + DROP TABLE IF EXISTS test.kafka; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic_name}', + kafka_group_name = '{topic_name}', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 8, + kafka_thread_per_consumer = 0, + kafka_poll_timeout_ms = 1000, + kafka_flush_interval_ms = 999; + CREATE TABLE test.view (key UInt64, value UInt64) ENGINE = Memory(); + CREATE MATERIALIZED VIEW test.consumer TO test.view AS SELECT * FROM test.kafka; + """ + ) + + instance.wait_for_log_line( + "kafka.*Polled batch of [0-9]+.*read_consumers_in_parallel", + repetitions=64, + look_behind_lines=100, + timeout=30, # we should get 64 polls in ~8 seconds, but when read sequentially it will take more than 64 sec + ) + + cancel.set() + kafka_thread.join() + + instance.query( + """ + DROP TABLE test.consumer; + DROP TABLE test.view; + DROP TABLE test.kafka; + """ + ) + kafka_delete_topic(admin_client, topic_name) + + +# if we came to partition end we will repeat polling until reaching kafka_max_block_size or flush_interval +# that behavior is a bit questionable - we can just take a bigger pauses between polls instead - +# to do more job in a single pass, and give more rest for a thread. +# But in cases of some peaky loads in kafka topic the current contract sounds more predictable and +# easier to understand, so let's keep it as is for now. +# also we can came to eof because we drained librdkafka internal queue too fast +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_premature_flush_on_eof( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + topic_name = "premature_flush_on_eof" + get_topic_postfix(create_query_generator) + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + topic_list=topic_name, + consumer_group=topic_name, + ) + instance.query( + f""" + {create_query}; + CREATE TABLE test.destination ( + key UInt64, + value UInt64, + _topic String, + _key String, + _offset UInt64, + _partition UInt64, + _timestamp Nullable(DateTime('UTC')), + _consumed_by LowCardinality(String) + ) + ENGINE = MergeTree() + ORDER BY key; + """ + ) + + # messages created here will be consumed immediately after MV creation + # reaching topic EOF. + # But we should not do flush immediately after reaching EOF, because + # next poll can return more data, and we should respect kafka_flush_interval_ms + # and try to form bigger block + messages = [json.dumps({"key": 1, "value": 1})] + kafka_produce(kafka_cluster, topic_name, messages) + + instance.query( + """ + CREATE MATERIALIZED VIEW test.kafka_consumer TO test.destination AS + SELECT + key, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.kafka; + """ + ) + + # all subscriptions/assignments done during select, so it start sending data to test.destination + # immediately after creation of MV + + instance.wait_for_log_line("Polled batch of 1 messages") + instance.wait_for_log_line("Stalled") + + # produce more messages after delay + kafka_produce(kafka_cluster, topic_name, messages) + + # data was not flushed yet (it will be flushed 7.5 sec after creating MV) + assert int(instance.query("SELECT count() FROM test.destination")) == 0 + + instance.wait_for_log_line("Committed offset 2") + + # it should be single part, i.e. single insert + result = instance.query( + "SELECT _part, count() FROM test.destination group by _part" + ) + assert TSV(result) == TSV("all_1_1_0\t2") + + instance.query( + """ + DROP TABLE test.kafka_consumer; + DROP TABLE test.destination; + """ + ) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_num_consumers_limit( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + instance.query("DROP TABLE IF EXISTS test.kafka") + + thread_per_consumer = must_use_thread_per_consumer(create_query_generator) + + create_query = create_query_generator( + "kafka", + "key UInt64, value UInt64", + settings={ + "kafka_num_consumers": 100, + "kafka_thread_per_consumer": thread_per_consumer, + }, + ) + error = instance.query_and_get_error(create_query) + + assert ( + "BAD_ARGUMENTS" in error + and "The number of consumers can not be bigger than" in error + ) + + instance.query( + f""" + SET kafka_disable_num_consumers_limit = 1; + {create_query}; + """ + ) + + instance.query("DROP TABLE test.kafka") + + +@pytest.mark.parametrize( + "create_query_generator, do_direct_read", + [(generate_old_create_table_query, True), (generate_new_create_table_query, False)], +) +def test_kafka_column_types( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + do_direct_read, +): + def assert_returned_exception(e): + assert e.value.returncode == 36 + assert ( + "KafkaEngine doesn't support DEFAULT/MATERIALIZED/EPHEMERAL expressions for columns." + in str(e.value) + ) + + # check column with DEFAULT expression + with pytest.raises(QueryRuntimeException) as exception: + instance.query(create_query_generator("kafka", "a Int, b Int DEFAULT 0")) + assert_returned_exception(exception) + + # check EPHEMERAL + with pytest.raises(QueryRuntimeException) as exception: + instance.query(create_query_generator("kafka", "a Int, b Int EPHEMERAL")) + assert_returned_exception(exception) + + # check MATERIALIZED + with pytest.raises(QueryRuntimeException) as exception: + instance.query( + """ + CREATE TABLE test.kafka (a Int, b String MATERIALIZED toString(a)) + ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_new}', '{kafka_group_name_new}', '{kafka_format_json_each_row}', '\\n') + """ + ) + assert_returned_exception(exception) + + if do_direct_read: + # check ALIAS + instance.query( + create_query_generator( + "kafka", + "a Int, b String Alias toString(a)", + settings={"kafka_commit_on_select": True}, + ) + ) + messages = [] + for i in range(5): + messages.append(json.dumps({"a": i})) + kafka_produce(kafka_cluster, KAFKA_TOPIC_NEW, messages) + result = "" + expected = TSV( + """ + 0\t0 + 1\t1 + 2\t2 + 3\t3 + 4\t4 + """ + ) + retries = 50 + while retries > 0: + result += instance.query("SELECT a, b FROM test.kafka", ignore_error=True) + if TSV(result) == expected: + break + retries -= 1 + time.sleep(0.5) + + assert TSV(result) == expected + + instance.query("DROP TABLE test.kafka SYNC") + + +def test_kafka_protobuf_no_delimiter( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'pb_no_delimiter', + kafka_group_name = 'pb_no_delimiter', + kafka_format = 'ProtobufSingle', + kafka_commit_on_select = 1, + kafka_schema = 'kafka.proto:KeyValuePair'; + """ + ) + + kafka_produce_protobuf_messages_no_delimiters( + kafka_cluster, "pb_no_delimiter", 0, 20 + ) + kafka_produce_protobuf_messages_no_delimiters( + kafka_cluster, "pb_no_delimiter", 20, 1 + ) + kafka_produce_protobuf_messages_no_delimiters( + kafka_cluster, "pb_no_delimiter", 21, 29 + ) + + result = "" + while True: + result += instance.query("SELECT * FROM test.kafka", ignore_error=True) + if kafka_check_result(result): + break + + kafka_check_result(result, True) + + instance.query( + """ + CREATE TABLE test.kafka_writer (key UInt64, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'pb_no_delimiter', + kafka_group_name = 'pb_no_delimiter', + kafka_format = 'ProtobufSingle', + kafka_commit_on_select = 1, + kafka_schema = 'kafka.proto:KeyValuePair'; + """ + ) + + instance.query( + "INSERT INTO test.kafka_writer VALUES (13,'Friday'),(42,'Answer to the Ultimate Question of Life, the Universe, and Everything'), (110, 'just a number')" + ) + + time.sleep(1) + + result = instance.query("SELECT * FROM test.kafka ORDER BY key", ignore_error=True) + + expected = """\ +13 Friday +42 Answer to the Ultimate Question of Life, the Universe, and Everything +110 just a number +""" + assert TSV(result) == TSV(expected) + + +def test_kafka_json_as_string( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + kafka_produce( + kafka_cluster, + "kafka_json_as_string", + [ + '{"t": 123, "e": {"x": "woof"} }', + "", + '{"t": 124, "e": {"x": "test"} }', + '{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"}', + ], + ) + + # 'tombstone' record (null value) = marker of deleted record + producer = KafkaProducer( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), + value_serializer=producer_serializer, + key_serializer=producer_serializer, + ) + producer.send(topic="kafka_json_as_string", key="xxx") + producer.flush() + + instance.query( + """ + CREATE TABLE test.kafka (field String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'kafka_json_as_string', + kafka_group_name = 'kafka_json_as_string', + kafka_commit_on_select = 1, + kafka_format = 'JSONAsString', + kafka_flush_interval_ms=1000; + """ + ) + + result = instance.query("SELECT * FROM test.kafka;") + expected = """\ +{"t": 123, "e": {"x": "woof"} } +{"t": 124, "e": {"x": "test"} } +{"F1":"V1","F2":{"F21":"V21","F22":{},"F23":"V23","F24":"2019-12-24T16:28:04"},"F3":"V3"} +""" + assert TSV(result) == TSV(expected) + assert instance.contains_in_log( + "Parsing of message (topic: kafka_json_as_string, partition: 0, offset: [0-9]*) return no rows" + ) + + +def test_kafka_settings_predefined_macros( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + instance.query( + """ + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = '{kafka_broker}:19092', + kafka_topic_list = '{database}_{table}_topic', + kafka_group_name = '{database}_{table}_group', + kafka_format = '{kafka_format_json_each_row}', + kafka_row_delimiter = '\\n', + kafka_commit_on_select = 1, + kafka_client_id = '{database}_{table} test 1234', + kafka_skip_broken_messages = 1; + """ + ) + + messages = [] + for i in range(50): + messages.append(json.dumps({"key": i, "value": i})) + kafka_produce(kafka_cluster, "test_kafka_topic", messages) + + result = instance.query("SELECT * FROM test.kafka", ignore_error=True) + kafka_check_result(result, True) + + members = describe_consumer_group(kafka_cluster, "test_kafka_group") + assert members[0]["client_id"] == "test_kafka test 1234" + + +def test_kafka_issue4116( + kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance +): + # Check that format_csv_delimiter parameter works now - as part of all available format settings. + kafka_produce( + kafka_cluster, + "issue4116", + ["1|foo", "2|bar", "42|answer", "100|multi\n101|row\n103|message"], + ) + + instance.query( + """ + CREATE TABLE test.kafka (a UInt64, b String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'issue4116', + kafka_group_name = 'issue4116', + kafka_commit_on_select = 1, + kafka_format = 'CSV', + kafka_row_delimiter = '\\n', + format_csv_delimiter = '|'; + """ + ) + + result = instance.query("SELECT * FROM test.kafka ORDER BY a;") + + expected = """\ +1 foo +2 bar +42 answer +100 multi +101 row +103 message +""" + assert TSV(result) == TSV(expected) + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_issue14202( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + """ + INSERT INTO Kafka Engine from an empty SELECT sub query was leading to failure + """ + + topic_name = "issue14202" + get_topic_postfix(create_query_generator) + create_query = create_query_generator( + "kafka_q", + "t UInt64, some_string String", + topic_list=topic_name, + consumer_group=topic_name, + ) + instance.query( + f""" + CREATE TABLE test.empty_table ( + dt Date, + some_string String + ) + ENGINE = MergeTree() + PARTITION BY toYYYYMM(dt) + ORDER BY some_string; + + {create_query}; + """ + ) + + instance.query( + "INSERT INTO test.kafka_q SELECT t, some_string FROM ( SELECT dt AS t, some_string FROM test.empty_table )" + ) + # check instance is alive + assert TSV(instance.query("SELECT 1")) == TSV("1") + instance.query( + """ + DROP TABLE test.empty_table; + DROP TABLE test.kafka_q; + """ + ) + + if __name__ == "__main__": init_cluster_and_instance() conftest_cluster.start() From 83bec1f3a9cb17eee5f60e837c8eae878f8112e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 11 Sep 2024 15:11:36 +0000 Subject: [PATCH 06/11] Add necessary import --- tests/integration/test_storage_kafka/kafka_tests_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_storage_kafka/kafka_tests_utils.py b/tests/integration/test_storage_kafka/kafka_tests_utils.py index a13b2f37d75..ff1c1a9d4b5 100644 --- a/tests/integration/test_storage_kafka/kafka_tests_utils.py +++ b/tests/integration/test_storage_kafka/kafka_tests_utils.py @@ -5,6 +5,7 @@ import socket import time from contextlib import contextmanager +import avro.datafile import avro.io import avro.schema from confluent_kafka.avro.serializer.message_serializer import MessageSerializer From bc7c9db5f3553fc41e2c09c6a220af0863969be3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 17 Sep 2024 10:19:15 +0000 Subject: [PATCH 07/11] Remove unnecessary comments --- tests/integration/test_storage_kafka/kafka_tests_utils.py | 3 --- tests/integration/test_storage_kafka/test_1.py | 5 ----- tests/integration/test_storage_kafka/test_2.py | 8 -------- tests/integration/test_storage_kafka/test_3.py | 4 +--- tests/integration/test_storage_kafka/test_4.py | 7 ------- 5 files changed, 1 insertion(+), 26 deletions(-) diff --git a/tests/integration/test_storage_kafka/kafka_tests_utils.py b/tests/integration/test_storage_kafka/kafka_tests_utils.py index ff1c1a9d4b5..a39f2ca4aa3 100644 --- a/tests/integration/test_storage_kafka/kafka_tests_utils.py +++ b/tests/integration/test_storage_kafka/kafka_tests_utils.py @@ -262,9 +262,6 @@ def avro_message(value): } ) bytes_writer = io.BytesIO() - # writer = avro.io.DatumWriter(schema) - # encoder = avro.io.BinaryEncoder(bytes_writer) - # writer.write(value, encoder) # DataFileWrite seems to be mandatory to get schema encoded writer = avro.datafile.DataFileWriter(bytes_writer, avro.io.DatumWriter(), schema) diff --git a/tests/integration/test_storage_kafka/test_1.py b/tests/integration/test_storage_kafka/test_1.py index 59a587614bf..1edb901c442 100644 --- a/tests/integration/test_storage_kafka/test_1.py +++ b/tests/integration/test_storage_kafka/test_1.py @@ -399,7 +399,6 @@ def test_kafka_duplicates_when_commit_failed( kafka_cluster.unpause_container("kafka1") - # kafka_cluster.open_bash_shell('instance') instance.wait_for_log_line("Committed offset 22") result = instance.query("SELECT count(), uniqExact(key), max(key) FROM test.view") @@ -516,9 +515,6 @@ def test_commits_of_unprocessed_messages_on_drop( cancel.set() instance.wait_for_log_line("kafka.*Stalled", repetitions=5) - # kafka_cluster.open_bash_shell('instance') - # SELECT key, _timestamp, _offset FROM test.destination where runningDifference(key) <> 1 ORDER BY key; - result = instance.query( "SELECT count(), uniqExact(key), max(key) FROM test.destination" ) @@ -1295,7 +1291,6 @@ def test_exception_from_destructor( """ ) - # kafka_cluster.open_bash_shell('instance') assert TSV(instance.query("SELECT 1")) == TSV("1") diff --git a/tests/integration/test_storage_kafka/test_2.py b/tests/integration/test_storage_kafka/test_2.py index 8dc45126312..e1357440c4e 100644 --- a/tests/integration/test_storage_kafka/test_2.py +++ b/tests/integration/test_storage_kafka/test_2.py @@ -488,11 +488,6 @@ def test_kafka_virtual_columns2( instance.wait_for_log_line(log_line, repetitions=4, look_behind_lines=6000) - describe_consumer_group(kafka_cluster, consumer_group) - # pprint.pprint(members) - # members[0]['client_id'] = 'ClickHouse-instance-test-kafka-0' - # members[1]['client_id'] = 'ClickHouse-instance-test-kafka-1' - result = instance.query( "SELECT * FROM test.view ORDER BY value", ignore_error=True ) @@ -586,8 +581,6 @@ def test_kafka_produce_key_timestamp( ) ) - # instance.wait_for_log_line(log_line) - expected = """\ 1 1 k1 1577836801 k1 insert3 0 0 1577836801 2 2 k2 1577836802 k2 insert3 0 1 1577836802 @@ -1052,7 +1045,6 @@ kafka_topic_old old_t members = describe_consumer_group(kafka_cluster, KAFKA_CONSUMER_GROUP_OLD) assert members[0]["client_id"] == "ClickHouse-instance-test-kafka" - # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose")) def test_kafka_json_without_delimiter( diff --git a/tests/integration/test_storage_kafka/test_3.py b/tests/integration/test_storage_kafka/test_3.py index 8a43014a5de..8d25db4ad9d 100644 --- a/tests/integration/test_storage_kafka/test_3.py +++ b/tests/integration/test_storage_kafka/test_3.py @@ -523,8 +523,6 @@ def test_kafka_no_holes_when_write_suffix_failed( ) logging.debug(result) - # kafka_cluster.open_bash_shell('instance') - instance.query( """ DROP TABLE test.consumer; @@ -600,7 +598,7 @@ def test_kafka_flush_by_block_size( # more flushes can happens during test, we need to check only result of first flush (part named all_1_1_0). result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'") - # logging.debug(result) + logging.debug(result) instance.query( """ diff --git a/tests/integration/test_storage_kafka/test_4.py b/tests/integration/test_storage_kafka/test_4.py index c72d2e01e0a..0068566bb43 100644 --- a/tests/integration/test_storage_kafka/test_4.py +++ b/tests/integration/test_storage_kafka/test_4.py @@ -554,7 +554,6 @@ def test_kafka_rebalance( FROM test.{table_name}; """ ) - # kafka_cluster.open_bash_shell('instance') # Waiting for test.kafka_consumerX to start consume ... instance.wait_for_log_line(log_line.format(table_name)) @@ -567,9 +566,6 @@ def test_kafka_rebalance( "DROP TABLE IF EXISTS test.kafka_consumer{} SYNC".format(consumer_index) ) - # logging.debug(instance.query('SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination')) - # kafka_cluster.open_bash_shell('instance') - while 1: messages_consumed = int( instance.query("SELECT uniqExact(key) FROM test.destination") @@ -1206,11 +1202,8 @@ def test_kafka_consumer_hang( instance.wait_for_log_line("heartbeat error") kafka_cluster.unpause_container("kafka1") - # logging.debug("Attempt to drop") instance.query("DROP TABLE test.kafka") - # kafka_cluster.open_bash_shell('instance') - instance.query( """ DROP TABLE test.consumer; From 8fcdfc5a994b7d4b4bc68d208fd2a1dff38e3ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 17 Sep 2024 10:19:29 +0000 Subject: [PATCH 08/11] Remove duplicated test --- .../integration/test_storage_kafka/test_1.py | 121 ------------------ 1 file changed, 121 deletions(-) diff --git a/tests/integration/test_storage_kafka/test_1.py b/tests/integration/test_storage_kafka/test_1.py index 1edb901c442..4cf966ed794 100644 --- a/tests/integration/test_storage_kafka/test_1.py +++ b/tests/integration/test_storage_kafka/test_1.py @@ -604,127 +604,6 @@ def test_kafka_produce_consume( ), "ClickHouse lost some messages: {}".format(result) -def test_system_kafka_consumers_rebalance( - kafka_cluster: ClickHouseCluster, instance: ClickHouseInstance -): - # based on test_kafka_consumer_hang2 - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - - producer = KafkaProducer( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), - value_serializer=producer_serializer, - key_serializer=producer_serializer, - ) - - topic = "system_kafka_cons2" - kafka_create_topic(admin_client, topic, num_partitions=2) - - instance.query( - f""" - DROP TABLE IF EXISTS test.kafka; - DROP TABLE IF EXISTS test.kafka2; - - CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_group_name = '{topic}', - kafka_commit_on_select = 1, - kafka_format = 'JSONEachRow'; - - CREATE TABLE test.kafka2 (key UInt64, value UInt64) - ENGINE = Kafka - SETTINGS kafka_broker_list = 'kafka1:19092', - kafka_topic_list = '{topic}', - kafka_commit_on_select = 1, - kafka_group_name = '{topic}', - kafka_format = 'JSONEachRow'; - """ - ) - - producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) - producer.send(topic=topic, value=json.dumps({"key": 11, "value": 11}), partition=1) - time.sleep(3) - - # first consumer subscribe the topic, try to poll some data, and go to rest - instance.query("SELECT * FROM test.kafka") - - # second consumer do the same leading to rebalance in the first - # consumer, try to poll some data - instance.query("SELECT * FROM test.kafka2") - - producer.send(topic=topic, value=json.dumps({"key": 1, "value": 1}), partition=0) - producer.send(topic=topic, value=json.dumps({"key": 10, "value": 10}), partition=1) - time.sleep(3) - - instance.query("SELECT * FROM test.kafka") - instance.query("SELECT * FROM test.kafka2") - instance.query("SELECT * FROM test.kafka") - instance.query("SELECT * FROM test.kafka2") - - result_system_kafka_consumers = instance.query( - """ - create or replace function stable_timestamp as - (d)->multiIf(d==toDateTime('1970-01-01 00:00:00'), 'never', abs(dateDiff('second', d, now())) < 30, 'now', toString(d)); - SELECT database, table, length(consumer_id), assignments.topic, assignments.partition_id, - assignments.current_offset, - if(length(exceptions.time)>0, exceptions.time[1]::String, 'never') as last_exception_time_, - if(length(exceptions.text)>0, exceptions.text[1], 'no exception') as last_exception_, - stable_timestamp(last_poll_time) as last_poll_time_, stable_timestamp(last_commit_time) as last_commit_time_, - num_commits, stable_timestamp(last_rebalance_time) as last_rebalance_time_, - num_rebalance_revocations, num_rebalance_assignments, is_currently_used - FROM system.kafka_consumers WHERE database='test' and table IN ('kafka', 'kafka2') format Vertical; - """ - ) - logging.debug(f"result_system_kafka_consumers: {result_system_kafka_consumers}") - assert ( - result_system_kafka_consumers - == """Row 1: -────── -database: test -table: kafka -length(consumer_id): 67 -assignments.topic: ['system_kafka_cons2'] -assignments.partition_id: [0] -assignments.current_offset: [2] -last_exception_time_: never -last_exception_: no exception -last_poll_time_: now -last_commit_time_: now -num_commits: 2 -last_rebalance_time_: now -num_rebalance_revocations: 1 -num_rebalance_assignments: 2 -is_currently_used: 0 - -Row 2: -────── -database: test -table: kafka2 -length(consumer_id): 68 -assignments.topic: ['system_kafka_cons2'] -assignments.partition_id: [1] -assignments.current_offset: [2] -last_exception_time_: never -last_exception_: no exception -last_poll_time_: now -last_commit_time_: now -num_commits: 1 -last_rebalance_time_: never -num_rebalance_revocations: 0 -num_rebalance_assignments: 1 -is_currently_used: 0 -""" - ) - - instance.query("DROP TABLE test.kafka") - instance.query("DROP TABLE test.kafka2") - - kafka_delete_topic(admin_client, topic) - - @pytest.mark.parametrize( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], From 346287802fcc67b2e8c2e43c91fb2d3267c61176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 18 Sep 2024 14:51:12 +0000 Subject: [PATCH 09/11] Split tets even further --- .../integration/test_storage_kafka/test_4.py | 600 ----------------- .../integration/test_storage_kafka/test_5.py | 630 ++++++++++++++++++ 2 files changed, 630 insertions(+), 600 deletions(-) create mode 100644 tests/integration/test_storage_kafka/test_5.py diff --git a/tests/integration/test_storage_kafka/test_4.py b/tests/integration/test_storage_kafka/test_4.py index 0068566bb43..382a22332c4 100644 --- a/tests/integration/test_storage_kafka/test_4.py +++ b/tests/integration/test_storage_kafka/test_4.py @@ -4,9 +4,6 @@ import threading import time import pytest -from confluent_kafka.avro.cached_schema_registry_client import ( - CachedSchemaRegistryClient, -) from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm from helpers.test_tools import TSV @@ -17,8 +14,6 @@ from test_storage_kafka.conftest import ( init_cluster_and_instance, ) from test_storage_kafka.kafka_tests_utils import ( - avro_confluent_message, - avro_message, describe_consumer_group, existing_kafka_topic, generate_new_create_table_query, @@ -41,601 +36,6 @@ if is_arm(): pytestmark = pytest.mark.skip -@pytest.mark.parametrize( - "create_query_generator", - [generate_old_create_table_query, generate_new_create_table_query], -) -def test_kafka_formats( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, -): - schema_registry_client = CachedSchemaRegistryClient( - {"url": f"http://localhost:{kafka_cluster.schema_registry_port}"} - ) - - # data was dumped from clickhouse itself in a following manner - # clickhouse-client --format=Native --query='SELECT toInt64(number) as id, toUInt16( intDiv( id, 65536 ) ) as blockNo, reinterpretAsString(19777) as val1, toFloat32(0.5) as val2, toUInt8(1) as val3 from numbers(100) ORDER BY id' | xxd -ps | tr -d '\n' | sed 's/\(..\)/\\x\1/g' - - all_formats = { - ## Text formats ## - # dumped with clickhouse-client ... | perl -pe 's/\n/\\n/; s/\t/\\t/g;' - "JSONEachRow": { - "data_sample": [ - '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', - '{"id":"1","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"2","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"3","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"4","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"5","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"6","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"7","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"8","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"9","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"10","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"11","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"12","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"13","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"14","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"15","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', - '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', - ], - "supports_empty_value": True, - }, - # JSONAsString doesn't fit to that test, and tested separately - "JSONCompactEachRow": { - "data_sample": [ - '["0", 0, "AM", 0.5, 1]\n', - '["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n', - '["0", 0, "AM", 0.5, 1]\n', - ], - "supports_empty_value": True, - }, - "JSONCompactEachRowWithNamesAndTypes": { - "data_sample": [ - '["id", "blockNo", "val1", "val2", "val3"]\n["Int64", "UInt16", "String", "Float32", "UInt8"]\n["0", 0, "AM", 0.5, 1]\n', - '["id", "blockNo", "val1", "val2", "val3"]\n["Int64", "UInt16", "String", "Float32", "UInt8"]\n["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n', - '["id", "blockNo", "val1", "val2", "val3"]\n["Int64", "UInt16", "String", "Float32", "UInt8"]\n["0", 0, "AM", 0.5, 1]\n', - # '' - # On empty message exception: Cannot parse input: expected '[' at end of stream., Stack trace (when copying this message, always include the lines below): - # /src/IO/ReadHelpers.h:175: DB::assertChar(char, DB::ReadBuffer&) @ 0x15db231a in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp:0: DB::JSONCompactEachRowRowInputFormat::readPrefix() @ 0x1dee6bd6 in /usr/bin/clickhouse - # /src/Processors/Formats/IRowInputFormat.cpp:0: DB::IRowInputFormat::generate() @ 0x1de72710 in /usr/bin/clickhouse - ], - }, - "TSKV": { - "data_sample": [ - "id=0\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\n", - "id=1\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=2\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=3\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=4\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=5\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=6\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=7\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=8\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=9\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=10\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=11\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=12\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=13\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=14\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=15\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\n", - "id=0\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\n", - # '' - # On empty message exception: Unexpected end of stream while reading key name from TSKV format - # /src/Processors/Formats/Impl/TSKVRowInputFormat.cpp:88: DB::readName(DB::ReadBuffer&, StringRef&, std::__1::basic_string, std::__1::allocator >&) @ 0x1df8c098 in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/TSKVRowInputFormat.cpp:114: DB::TSKVRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df8ae3e in /usr/bin/clickhouse - # /src/Processors/Formats/IRowInputFormat.cpp:64: DB::IRowInputFormat::generate() @ 0x1de727cf in /usr/bin/clickhouse - ], - }, - "CSV": { - "data_sample": [ - '0,0,"AM",0.5,1\n', - '1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n', - '0,0,"AM",0.5,1\n', - ], - "supports_empty_value": True, - }, - "TSV": { - "data_sample": [ - "0\t0\tAM\t0.5\t1\n", - "1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", - "0\t0\tAM\t0.5\t1\n", - ], - "supports_empty_value": True, - }, - "CSVWithNames": { - "data_sample": [ - '"id","blockNo","val1","val2","val3"\n0,0,"AM",0.5,1\n', - '"id","blockNo","val1","val2","val3"\n1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n', - '"id","blockNo","val1","val2","val3"\n0,0,"AM",0.5,1\n', - # '', - # On empty message exception happens: Attempt to read after eof - # /src/IO/VarInt.h:122: DB::throwReadAfterEOF() @ 0x15c34487 in /usr/bin/clickhouse - # /src/IO/ReadHelpers.cpp:583: void DB::readCSVStringInto, std::__1::allocator > >(std::__1::basic_string, std::__1::allocator >&, DB::ReadBuffer&, DB::FormatSettings::CSV const&) @ 0x15c961e1 in /usr/bin/clickhouse - # /src/IO/ReadHelpers.cpp:678: DB::readCSVString(std::__1::basic_string, std::__1::allocator >&, DB::ReadBuffer&, DB::FormatSettings::CSV const&) @ 0x15c8dfae in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/CSVRowInputFormat.cpp:170: DB::CSVRowInputFormat::readPrefix() @ 0x1dec46f7 in /usr/bin/clickhouse - # /src/Processors/Formats/IRowInputFormat.cpp:0: DB::IRowInputFormat::generate() @ 0x1de72710 in /usr/bin/clickhouse - # /src/Processors/ISource.cpp:48: DB::ISource::work() @ 0x1dd79737 in /usr/bin/clickhouse - ], - }, - "Values": { - "data_sample": [ - "(0,0,'AM',0.5,1)", - "(1,0,'AM',0.5,1),(2,0,'AM',0.5,1),(3,0,'AM',0.5,1),(4,0,'AM',0.5,1),(5,0,'AM',0.5,1),(6,0,'AM',0.5,1),(7,0,'AM',0.5,1),(8,0,'AM',0.5,1),(9,0,'AM',0.5,1),(10,0,'AM',0.5,1),(11,0,'AM',0.5,1),(12,0,'AM',0.5,1),(13,0,'AM',0.5,1),(14,0,'AM',0.5,1),(15,0,'AM',0.5,1)", - "(0,0,'AM',0.5,1)", - ], - "supports_empty_value": True, - }, - "TSVWithNames": { - "data_sample": [ - "id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n", - "id\tblockNo\tval1\tval2\tval3\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", - "id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n", - ], - "supports_empty_value": True, - }, - "TSVWithNamesAndTypes": { - "data_sample": [ - "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\t0\tAM\t0.5\t1\n", - "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", - "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\t0\tAM\t0.5\t1\n", - # '', - # On empty message exception happens: Cannot parse input: expected '\n' at end of stream. - # /src/IO/ReadHelpers.cpp:84: DB::throwAtAssertionFailed(char const*, DB::ReadBuffer&) @ 0x15c8d8ec in /usr/bin/clickhouse - # /src/IO/ReadHelpers.h:175: DB::assertChar(char, DB::ReadBuffer&) @ 0x15db231a in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp:24: DB::skipTSVRow(DB::ReadBuffer&, unsigned long) @ 0x1df92fac in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp:168: DB::TabSeparatedRowInputFormat::readPrefix() @ 0x1df92df0 in /usr/bin/clickhouse - # /src/Processors/Formats/IRowInputFormat.cpp:0: DB::IRowInputFormat::generate() @ 0x1de72710 in /usr/bin/clickhouse - ], - }, - "CustomSeparated": { - "data_sample": [ - "0\t0\tAM\t0.5\t1\n", - "1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", - "0\t0\tAM\t0.5\t1\n", - ], - }, - "Template": { - "data_sample": [ - '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', - '(id = 1, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 2, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 3, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 4, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 5, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 6, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 7, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 8, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 9, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 10, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 11, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 12, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 13, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 14, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 15, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', - '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', - ], - "extra_settings": {"format_template_row": "template_row.format"}, - }, - "Regexp": { - "data_sample": [ - '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', - '(id = 1, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 2, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 3, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 4, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 5, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 6, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 7, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 8, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 9, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 10, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 11, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 12, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 13, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 14, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 15, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', - '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', - # '' - # On empty message exception happens: Line "" doesn't match the regexp.: (at row 1) - # /src/Processors/Formats/Impl/RegexpRowInputFormat.cpp:140: DB::RegexpRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df82fcb in /usr/bin/clickhouse - ], - "extra_settings": { - "format_regexp": r"\(id = (.+?), blockNo = (.+?), val1 = \"(.+?)\", val2 = (.+?), val3 = (.+?)\)", - "format_regexp_escaping_rule": "Escaped", - }, - }, - ## BINARY FORMATS - # dumped with - # clickhouse-client ... | xxd -ps -c 200 | tr -d '\n' | sed 's/\(..\)/\\x\1/g' - "Native": { - "data_sample": [ - b"\x05\x01\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x00\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01", - b"\x05\x0f\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01", - b"\x05\x01\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x00\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01", - # '' - # On empty message exception happens: DB::Exception: Attempt to read after eof - # 1. DB::throwReadAfterEOF() @ 0xb76449b in /usr/bin/clickhouse - # 2. ? @ 0xb79cb0b in /usr/bin/clickhouse - # 3. DB::NativeReader::read() @ 0x16e7a084 in /usr/bin/clickhouse - # 4. DB::NativeInputFormat::generate() @ 0x16f76922 in /usr/bin/clickhouse - # 5. DB::ISource::tryGenerate() @ 0x16e90bd5 in /usr/bin/clickhouse - # 6. DB::ISource::work() @ 0x16e9087a in /usr/bin/clickhouse - ], - }, - "MsgPack": { - "data_sample": [ - b"\x00\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01", - b"\x01\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x02\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x03\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x04\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x05\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x06\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x07\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x08\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x09\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0a\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0b\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0c\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0d\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0e\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0f\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01", - b"\x00\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01", - # '' - # On empty message exception happens: Unexpected end of file while parsing msgpack object.: (at row 1) - # coming from Processors/Formats/Impl/MsgPackRowInputFormat.cpp:170 - ], - }, - "RowBinary": { - "data_sample": [ - b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", - b"\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", - b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", - # '' - # On empty message exception happens: DB::Exception: Cannot read all data. Bytes read: 0. Bytes expected: 8. - # /src/IO/ReadBuffer.h:157: DB::ReadBuffer::readStrict(char*, unsigned long) @ 0x15c6894d in /usr/bin/clickhouse - # /src/IO/ReadHelpers.h:108: void DB::readPODBinary(long&, DB::ReadBuffer&) @ 0x15c67715 in /usr/bin/clickhouse - # /src/IO/ReadHelpers.h:737: std::__1::enable_if, void>::type DB::readBinary(long&, DB::ReadBuffer&) @ 0x15e7afbd in /usr/bin/clickhouse - # /src/DataTypes/DataTypeNumberBase.cpp:180: DB::DataTypeNumberBase::deserializeBinary(DB::IColumn&, DB::ReadBuffer&) const @ 0x1cace581 in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/BinaryRowInputFormat.cpp:22: DB::BinaryRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1dea2c0b in /usr/bin/clickhouse - ], - }, - "RowBinaryWithNamesAndTypes": { - "data_sample": [ - b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x55\x49\x6e\x74\x31\x36\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", - b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x55\x49\x6e\x74\x31\x36\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", - b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x55\x49\x6e\x74\x31\x36\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", - # '' - # !!! On empty message segfault: Address not mapped to object - # /contrib/FastMemcpy/FastMemcpy.h:666: memcpy_fast @ 0x21742d65 in /usr/bin/clickhouse - # /contrib/FastMemcpy/memcpy_wrapper.c:5: memcpy @ 0x21738235 in /usr/bin/clickhouse - # /src/IO/ReadBuffer.h:145: DB::ReadBuffer::read(char*, unsigned long) @ 0x15c369d7 in /usr/bin/clickhouse - # /src/IO/ReadBuffer.h:155: DB::ReadBuffer::readStrict(char*, unsigned long) @ 0x15c68878 in /usr/bin/clickhouse - # /src/DataTypes/DataTypeString.cpp:84: DB::DataTypeString::deserializeBinary(DB::IColumn&, DB::ReadBuffer&) const @ 0x1cad12e7 in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/BinaryRowInputFormat.cpp:22: DB::BinaryRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1dea2c0b in /usr/bin/clickhouse - ], - }, - "Protobuf": { - "data_sample": [ - b"\x0b\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01", - b"\x0d\x08\x01\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x02\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x03\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x04\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x05\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x06\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x07\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x08\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x09\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0a\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0b\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0c\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0d\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0e\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0f\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01", - b"\x0b\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01", - # '' - # On empty message exception: Attempt to read after eof - # /src/IO/ReadBuffer.h:184: DB::ReadBuffer::throwReadAfterEOF() @ 0x15c9699b in /usr/bin/clickhouse - # /src/Formats/ProtobufReader.h:115: DB::ProtobufReader::SimpleReader::startMessage() @ 0x1df4f828 in /usr/bin/clickhouse - # /src/Formats/ProtobufReader.cpp:1119: DB::ProtobufReader::startMessage() @ 0x1df5356c in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp:25: DB::ProtobufRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df4cc71 in /usr/bin/clickhouse - # /src/Processors/Formats/IRowInputFormat.cpp:64: DB::IRowInputFormat::generate() @ 0x1de727cf in /usr/bin/clickhouse - ], - "extra_settings": {"kafka_schema": "test:TestMessage"}, - }, - "ORC": { - "data_sample": [ - b"\x4f\x52\x43\x11\x00\x00\x0a\x06\x12\x04\x08\x01\x50\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x30\x00\x00\xe3\x12\xe7\x62\x65\x00\x01\x21\x3e\x0e\x46\x25\x0e\x2e\x46\x03\x21\x46\x03\x09\xa6\x00\x06\x00\x32\x00\x00\xe3\x92\xe4\x62\x65\x00\x01\x21\x01\x0e\x46\x25\x2e\x2e\x26\x47\x5f\x21\x20\x96\x60\x09\x60\x00\x00\x36\x00\x00\xe3\x92\xe1\x62\x65\x00\x01\x21\x61\x0e\x46\x23\x5e\x2e\x46\x03\x21\x66\x03\x3d\x53\x29\x10\x11\xc0\x00\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\x05\x00\x00\xff\x00\x03\x00\x00\x30\x07\x00\x00\x40\x00\x80\x05\x00\x00\x41\x4d\x07\x00\x00\x42\x00\x80\x03\x00\x00\x0a\x07\x00\x00\x42\x00\x80\x05\x00\x00\xff\x01\x88\x00\x00\x4d\xca\xc1\x0a\x80\x30\x0c\x03\xd0\x2e\x6b\xcb\x98\x17\xf1\x14\x50\xfc\xff\xcf\xb4\x66\x1e\x3c\x84\x47\x9a\xce\x1c\xb9\x1b\xb7\xf9\xda\x48\x09\x9e\xb2\xf3\x92\xce\x5b\x86\xf6\x56\x7f\x21\x41\x2f\x51\xa6\x7a\xd7\x1d\xe5\xea\xae\x3d\xca\xd5\x83\x71\x60\xd8\x17\xfc\x62\x0f\xa8\x00\x00\xe3\x4a\xe6\x62\xe1\x60\x0c\x60\xe0\xe2\xe3\x60\x14\x62\xe3\x60\x10\x60\x90\x60\x08\x60\x88\x60\xe5\x12\xe0\x60\x54\xe2\xe0\x62\x34\x10\x62\x34\x90\x60\x02\x8a\x70\x71\x09\x01\x45\xb8\xb8\x98\x1c\x7d\x85\x80\x58\x82\x05\x28\xc6\xcd\x25\xca\xc1\x68\xc4\x0b\x52\xc5\x6c\xa0\x67\x2a\x05\x22\xc0\x4a\x21\x86\x31\x09\x30\x81\xb5\xb2\x02\x00\x36\x01\x00\x25\x8c\xbd\x0a\xc2\x30\x14\x85\x73\x6f\x92\xf6\x92\x6a\x09\x01\x21\x64\x92\x4e\x75\x91\x58\x71\xc9\x64\x27\x5d\x2c\x1d\x5d\xfd\x59\xc4\x42\x37\x5f\xc0\x17\xe8\x23\x9b\xc6\xe1\x3b\x70\x0f\xdf\xb9\xc4\xf5\x17\x5d\x41\x5c\x4f\x60\x37\xeb\x53\x0d\x55\x4d\x0b\x23\x01\xb9\x90\x2e\xbf\x0f\xe3\xe3\xdd\x8d\x0e\x5f\x4f\x27\x3e\xb7\x61\x97\xb2\x49\xb9\xaf\x90\x20\x92\x27\x32\x2a\x6b\xf4\xf3\x0d\x1e\x82\x20\xe8\x59\x28\x09\x4c\x46\x4c\x33\xcb\x7a\x76\x95\x41\x47\x9f\x14\x78\x03\xde\x62\x6c\x54\x30\xb1\x51\x0a\xdb\x8b\x89\x58\x11\xbb\x22\xac\x08\x9a\xe5\x6c\x71\xbf\x3d\xb8\x39\x92\xfa\x7f\x86\x1a\xd3\x54\x1e\xa7\xee\xcc\x7e\x08\x9e\x01\x10\x01\x18\x80\x80\x10\x22\x02\x00\x0c\x28\x57\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", - b"\x4f\x52\x43\x11\x00\x00\x0a\x06\x12\x04\x08\x0f\x50\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x0f\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x30\x00\x00\xe3\x12\xe7\x62\x65\x00\x01\x21\x3e\x0e\x7e\x25\x0e\x2e\x46\x43\x21\x46\x4b\x09\xad\x00\x06\x00\x33\x00\x00\x0a\x17\x0a\x03\x00\x00\x00\x12\x10\x08\x0f\x22\x0a\x0a\x02\x41\x4d\x12\x02\x41\x4d\x18\x3c\x50\x00\x3a\x00\x00\xe3\x92\xe1\x62\x65\x00\x01\x21\x61\x0e\x7e\x23\x5e\x2e\x46\x03\x21\x66\x03\x3d\x53\x29\x66\x73\x3d\xd3\x00\x06\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x0f\x12\x06\x08\x02\x10\x02\x18\x1e\x50\x00\x05\x00\x00\x0c\x00\x2b\x00\x00\x31\x32\x33\x34\x35\x36\x37\x38\x39\x31\x30\x31\x31\x31\x32\x31\x33\x31\x34\x31\x35\x09\x00\x00\x06\x01\x03\x02\x09\x00\x00\xc0\x0e\x00\x00\x07\x00\x00\x42\x00\x80\x05\x00\x00\x41\x4d\x0a\x00\x00\xe3\xe2\x42\x01\x00\x09\x00\x00\xc0\x0e\x02\x00\x05\x00\x00\x0c\x01\x94\x00\x00\x2d\xca\xc1\x0e\x80\x30\x08\x03\xd0\xc1\x60\x2e\xf3\x62\x76\x6a\xe2\x0e\xfe\xff\x57\x5a\x3b\x0f\xe4\x51\xe8\x68\xbd\x5d\x05\xe7\xf8\x34\x40\x3a\x6e\x59\xb1\x64\xe0\x91\xa9\xbf\xb1\x97\xd2\x95\x9d\x1e\xca\x55\x3a\x6d\xb4\xd2\xdd\x0b\x74\x9a\x74\xf7\x12\x39\xbd\x97\x7f\x7c\x06\xbb\xa6\x8d\x97\x17\xb4\x00\x00\xe3\x4a\xe6\x62\xe1\xe0\x0f\x60\xe0\xe2\xe3\xe0\x17\x62\xe3\x60\x10\x60\x90\x60\x08\x60\x88\x60\xe5\x12\xe0\xe0\x57\xe2\xe0\x62\x34\x14\x62\xb4\x94\xd0\x02\x8a\xc8\x73\x09\x01\x45\xb8\xb8\x98\x1c\x7d\x85\x80\x58\xc2\x06\x28\x26\xc4\x25\xca\xc1\x6f\xc4\xcb\xc5\x68\x20\xc4\x6c\xa0\x67\x2a\xc5\x6c\xae\x67\x0a\x14\xe6\x87\x1a\xc6\x24\xc0\x24\x21\x07\x32\x0c\x00\x4a\x01\x00\xe3\x60\x16\x58\xc3\x24\xc5\xcd\xc1\x2c\x30\x89\x51\xc2\x4b\xc1\x57\x83\x5f\x49\x83\x83\x47\x88\x95\x91\x89\x99\x85\x55\x8a\x3d\x29\x27\x3f\x39\xdb\x2f\x5f\x8a\x29\x33\x45\x8a\xa5\x2c\x31\xc7\x10\x4c\x1a\x81\x49\x63\x25\x26\x0e\x46\x20\x66\x07\x63\x36\x0e\x3e\x0d\x26\x03\x10\x9f\xd1\x80\xdf\x8a\x85\x83\x3f\x80\xc1\x8a\x8f\x83\x5f\x88\x8d\x83\x41\x80\x41\x82\x21\x80\x21\x82\xd5\x4a\x80\x83\x5f\x89\x83\x8b\xd1\x50\x88\xd1\x52\x42\x0b\x28\x22\x6f\x25\x04\x14\xe1\xe2\x62\x72\xf4\x15\x02\x62\x09\x1b\xa0\x98\x90\x95\x28\x07\xbf\x11\x2f\x17\xa3\x81\x10\xb3\x81\x9e\xa9\x14\xb3\xb9\x9e\x29\x50\x98\x1f\x6a\x18\x93\x00\x93\x84\x1c\xc8\x30\x87\x09\x7e\x1e\x0c\x00\x08\xa8\x01\x10\x01\x18\x80\x80\x10\x22\x02\x00\x0c\x28\x5d\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", - b"\x4f\x52\x43\x11\x00\x00\x0a\x06\x12\x04\x08\x01\x50\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x30\x00\x00\xe3\x12\xe7\x62\x65\x00\x01\x21\x3e\x0e\x46\x25\x0e\x2e\x46\x03\x21\x46\x03\x09\xa6\x00\x06\x00\x32\x00\x00\xe3\x92\xe4\x62\x65\x00\x01\x21\x01\x0e\x46\x25\x2e\x2e\x26\x47\x5f\x21\x20\x96\x60\x09\x60\x00\x00\x36\x00\x00\xe3\x92\xe1\x62\x65\x00\x01\x21\x61\x0e\x46\x23\x5e\x2e\x46\x03\x21\x66\x03\x3d\x53\x29\x10\x11\xc0\x00\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\x05\x00\x00\xff\x00\x03\x00\x00\x30\x07\x00\x00\x40\x00\x80\x05\x00\x00\x41\x4d\x07\x00\x00\x42\x00\x80\x03\x00\x00\x0a\x07\x00\x00\x42\x00\x80\x05\x00\x00\xff\x01\x88\x00\x00\x4d\xca\xc1\x0a\x80\x30\x0c\x03\xd0\x2e\x6b\xcb\x98\x17\xf1\x14\x50\xfc\xff\xcf\xb4\x66\x1e\x3c\x84\x47\x9a\xce\x1c\xb9\x1b\xb7\xf9\xda\x48\x09\x9e\xb2\xf3\x92\xce\x5b\x86\xf6\x56\x7f\x21\x41\x2f\x51\xa6\x7a\xd7\x1d\xe5\xea\xae\x3d\xca\xd5\x83\x71\x60\xd8\x17\xfc\x62\x0f\xa8\x00\x00\xe3\x4a\xe6\x62\xe1\x60\x0c\x60\xe0\xe2\xe3\x60\x14\x62\xe3\x60\x10\x60\x90\x60\x08\x60\x88\x60\xe5\x12\xe0\x60\x54\xe2\xe0\x62\x34\x10\x62\x34\x90\x60\x02\x8a\x70\x71\x09\x01\x45\xb8\xb8\x98\x1c\x7d\x85\x80\x58\x82\x05\x28\xc6\xcd\x25\xca\xc1\x68\xc4\x0b\x52\xc5\x6c\xa0\x67\x2a\x05\x22\xc0\x4a\x21\x86\x31\x09\x30\x81\xb5\xb2\x02\x00\x36\x01\x00\x25\x8c\xbd\x0a\xc2\x30\x14\x85\x73\x6f\x92\xf6\x92\x6a\x09\x01\x21\x64\x92\x4e\x75\x91\x58\x71\xc9\x64\x27\x5d\x2c\x1d\x5d\xfd\x59\xc4\x42\x37\x5f\xc0\x17\xe8\x23\x9b\xc6\xe1\x3b\x70\x0f\xdf\xb9\xc4\xf5\x17\x5d\x41\x5c\x4f\x60\x37\xeb\x53\x0d\x55\x4d\x0b\x23\x01\xb9\x90\x2e\xbf\x0f\xe3\xe3\xdd\x8d\x0e\x5f\x4f\x27\x3e\xb7\x61\x97\xb2\x49\xb9\xaf\x90\x20\x92\x27\x32\x2a\x6b\xf4\xf3\x0d\x1e\x82\x20\xe8\x59\x28\x09\x4c\x46\x4c\x33\xcb\x7a\x76\x95\x41\x47\x9f\x14\x78\x03\xde\x62\x6c\x54\x30\xb1\x51\x0a\xdb\x8b\x89\x58\x11\xbb\x22\xac\x08\x9a\xe5\x6c\x71\xbf\x3d\xb8\x39\x92\xfa\x7f\x86\x1a\xd3\x54\x1e\xa7\xee\xcc\x7e\x08\x9e\x01\x10\x01\x18\x80\x80\x10\x22\x02\x00\x0c\x28\x57\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", - # '' - # On empty message exception: IOError: File size too small, Stack trace (when copying this message, always include the lines below): - # /src/Processors/Formats/Impl/ORCBlockInputFormat.cpp:36: DB::ORCBlockInputFormat::generate() @ 0x1df282a6 in /usr/bin/clickhouse - # /src/Processors/ISource.cpp:48: DB::ISource::work() @ 0x1dd79737 in /usr/bin/clickhouse - ], - }, - "CapnProto": { - "data_sample": [ - b"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00", - b"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00", - b"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00", - # '' - # On empty message exception: Cannot read all data. Bytes read: 0. Bytes expected: 4. - # /src/IO/ReadBuffer.h:157: DB::ReadBuffer::readStrict(char*, unsigned long) @ 0x15c6894d in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp:212: DB::CapnProtoRowInputFormat::readMessage() @ 0x1ded1cab in /usr/bin/clickhouse - # /src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp:241: DB::CapnProtoRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1ded205d in /usr/bin/clickhouse - ], - "extra_settings": {"kafka_schema": "test:TestRecordStruct"}, - }, - "Parquet": { - "data_sample": [ - b"\x50\x41\x52\x31\x15\x04\x15\x10\x15\x14\x4c\x15\x02\x15\x04\x12\x00\x00\x08\x1c\x00\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x0c\x15\x10\x4c\x15\x02\x15\x04\x12\x00\x00\x06\x14\x02\x00\x00\x00\x41\x4d\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x03\x08\x01\x02\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x3f\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x03\x08\x01\x02\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x01\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x15\x02\x19\x6c\x35\x00\x18\x06\x73\x63\x68\x65\x6d\x61\x15\x0a\x00\x15\x04\x25\x00\x18\x02\x69\x64\x00\x15\x02\x25\x00\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x25\x18\x4c\xac\x13\x10\x12\x00\x00\x00\x15\x0c\x25\x00\x18\x04\x76\x61\x6c\x31\x25\x00\x4c\x1c\x00\x00\x00\x15\x08\x25\x00\x18\x04\x76\x61\x6c\x32\x00\x15\x02\x25\x00\x18\x04\x76\x61\x6c\x33\x25\x16\x4c\xac\x13\x08\x12\x00\x00\x00\x16\x02\x19\x1c\x19\x5c\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x16\x98\x05\x16\x02\x00\x28\x22\x70\x61\x72\x71\x75\x65\x74\x2d\x63\x70\x70\x20\x76\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x35\x2e\x31\x2d\x53\x4e\x41\x50\x53\x48\x4f\x54\x19\x5c\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00\xc4\x01\x00\x00\x50\x41\x52\x31", - b"\x50\x41\x52\x31\x15\x04\x15\xf0\x01\x15\x90\x01\x4c\x15\x1e\x15\x04\x12\x00\x00\x78\x04\x01\x00\x09\x01\x00\x02\x09\x07\x04\x00\x03\x0d\x08\x00\x04\x0d\x08\x00\x05\x0d\x08\x00\x06\x0d\x08\x00\x07\x0d\x08\x00\x08\x0d\x08\x00\x09\x0d\x08\x00\x0a\x0d\x08\x00\x0b\x0d\x08\x00\x0c\x0d\x08\x00\x0d\x0d\x08\x3c\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x14\x15\x18\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x18\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x24\x04\x05\x10\x32\x54\x76\x98\xba\xdc\x0e\x26\xca\x02\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x1e\x16\x9e\x03\x16\xc2\x02\x26\xb8\x01\x26\x08\x1c\x18\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x1e\x00\x26\xd8\x04\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\x8c\x04\x26\xe4\x03\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x0c\x15\x10\x4c\x15\x02\x15\x04\x12\x00\x00\x06\x14\x02\x00\x00\x00\x41\x4d\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x03\x08\x01\x1e\x00\x26\xb2\x06\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x1e\x16\x68\x16\x70\x26\xee\x05\x26\xc2\x05\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x3f\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x03\x08\x01\x1e\x00\x26\x9a\x08\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x1e\x16\x84\x01\x16\x8c\x01\x26\xb6\x07\x26\x8e\x07\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x01\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x03\x08\x01\x1e\x00\x26\x8e\x0a\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\xc2\x09\x26\x9a\x09\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x15\x02\x19\x6c\x35\x00\x18\x06\x73\x63\x68\x65\x6d\x61\x15\x0a\x00\x15\x04\x25\x00\x18\x02\x69\x64\x00\x15\x02\x25\x00\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x25\x18\x4c\xac\x13\x10\x12\x00\x00\x00\x15\x0c\x25\x00\x18\x04\x76\x61\x6c\x31\x25\x00\x4c\x1c\x00\x00\x00\x15\x08\x25\x00\x18\x04\x76\x61\x6c\x32\x00\x15\x02\x25\x00\x18\x04\x76\x61\x6c\x33\x25\x16\x4c\xac\x13\x08\x12\x00\x00\x00\x16\x1e\x19\x1c\x19\x5c\x26\xca\x02\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x1e\x16\x9e\x03\x16\xc2\x02\x26\xb8\x01\x26\x08\x1c\x18\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x26\xd8\x04\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\x8c\x04\x26\xe4\x03\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x26\xb2\x06\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x1e\x16\x68\x16\x70\x26\xee\x05\x26\xc2\x05\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x26\x9a\x08\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x1e\x16\x84\x01\x16\x8c\x01\x26\xb6\x07\x26\x8e\x07\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x26\x8e\x0a\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\xc2\x09\x26\x9a\x09\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x16\xa6\x06\x16\x1e\x00\x28\x22\x70\x61\x72\x71\x75\x65\x74\x2d\x63\x70\x70\x20\x76\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x35\x2e\x31\x2d\x53\x4e\x41\x50\x53\x48\x4f\x54\x19\x5c\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00\xc5\x01\x00\x00\x50\x41\x52\x31", - b"\x50\x41\x52\x31\x15\x04\x15\x10\x15\x14\x4c\x15\x02\x15\x04\x12\x00\x00\x08\x1c\x00\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x0c\x15\x10\x4c\x15\x02\x15\x04\x12\x00\x00\x06\x14\x02\x00\x00\x00\x41\x4d\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x03\x08\x01\x02\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x3f\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x03\x08\x01\x02\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x01\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x15\x02\x19\x6c\x35\x00\x18\x06\x73\x63\x68\x65\x6d\x61\x15\x0a\x00\x15\x04\x25\x00\x18\x02\x69\x64\x00\x15\x02\x25\x00\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x25\x18\x4c\xac\x13\x10\x12\x00\x00\x00\x15\x0c\x25\x00\x18\x04\x76\x61\x6c\x31\x25\x00\x4c\x1c\x00\x00\x00\x15\x08\x25\x00\x18\x04\x76\x61\x6c\x32\x00\x15\x02\x25\x00\x18\x04\x76\x61\x6c\x33\x25\x16\x4c\xac\x13\x08\x12\x00\x00\x00\x16\x02\x19\x1c\x19\x5c\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x16\x98\x05\x16\x02\x00\x28\x22\x70\x61\x72\x71\x75\x65\x74\x2d\x63\x70\x70\x20\x76\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x35\x2e\x31\x2d\x53\x4e\x41\x50\x53\x48\x4f\x54\x19\x5c\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00\xc4\x01\x00\x00\x50\x41\x52\x31", - ], - }, - "AvroConfluent": { - "data_sample": [ - avro_confluent_message( - schema_registry_client, - {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1}, - ), - b"".join( - [ - avro_confluent_message( - schema_registry_client, - { - "id": id, - "blockNo": 0, - "val1": str("AM"), - "val2": 0.5, - "val3": 1, - }, - ) - for id in range(1, 16) - ] - ), - avro_confluent_message( - schema_registry_client, - {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1}, - ), - ], - "extra_settings": { - "format_avro_schema_registry_url": "http://{}:{}".format( - kafka_cluster.schema_registry_host, - kafka_cluster.schema_registry_port, - ) - }, - "supports_empty_value": True, - }, - "Avro": { - # It seems impossible to send more than one avro file per a message - # because of nature of Avro: blocks go one after another - "data_sample": [ - avro_message( - {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1} - ), - avro_message( - [ - { - "id": id, - "blockNo": 0, - "val1": str("AM"), - "val2": 0.5, - "val3": 1, - } - for id in range(1, 16) - ] - ), - avro_message( - {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1} - ), - ], - "supports_empty_value": False, - }, - "Arrow": { - "data_sample": [ - b"\x41\x52\x52\x4f\x57\x31\x00\x00\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x03\x00\x3c\x00\x00\x00\x28\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x58\x01\x00\x00\x00\x00\x00\x00\x60\x01\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\x78\x01\x00\x00\x41\x52\x52\x4f\x57\x31", - b"\x41\x52\x52\x4f\x57\x31\x00\x00\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x48\x01\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\xd8\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x04\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00\x0a\x00\x00\x00\x0c\x00\x00\x00\x0e\x00\x00\x00\x10\x00\x00\x00\x12\x00\x00\x00\x14\x00\x00\x00\x16\x00\x00\x00\x18\x00\x00\x00\x1a\x00\x00\x00\x1c\x00\x00\x00\x1e\x00\x00\x00\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x03\x00\x3c\x00\x00\x00\x28\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x58\x01\x00\x00\x00\x00\x00\x00\x60\x01\x00\x00\x00\x00\x00\x00\x48\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\x78\x01\x00\x00\x41\x52\x52\x4f\x57\x31", - b"\x41\x52\x52\x4f\x57\x31\x00\x00\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x03\x00\x3c\x00\x00\x00\x28\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x58\x01\x00\x00\x00\x00\x00\x00\x60\x01\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\x78\x01\x00\x00\x41\x52\x52\x4f\x57\x31", - ], - }, - "ArrowStream": { - "data_sample": [ - b"\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00", - b"\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x48\x01\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\xd8\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x04\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00\x0a\x00\x00\x00\x0c\x00\x00\x00\x0e\x00\x00\x00\x10\x00\x00\x00\x12\x00\x00\x00\x14\x00\x00\x00\x16\x00\x00\x00\x18\x00\x00\x00\x1a\x00\x00\x00\x1c\x00\x00\x00\x1e\x00\x00\x00\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\xff\xff\xff\xff\x00\x00\x00\x00", - b"\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00", - ], - }, - } - - topic_postfix = str(hash(create_query_generator)) - for format_name, format_opts in list(all_formats.items()): - logging.debug(f"Set up {format_name}") - topic_name = f"format_tests_{format_name}-{topic_postfix}" - data_sample = format_opts["data_sample"] - data_prefix = [] - # prepend empty value when supported - if format_opts.get("supports_empty_value", False): - data_prefix = data_prefix + [""] - kafka_produce(kafka_cluster, topic_name, data_prefix + data_sample) - - extra_settings = format_opts.get("extra_settings") or {} - extra_settings["kafka_flush_interval_ms"] = 1000 - - instance.query( - """ - DROP TABLE IF EXISTS test.kafka_{format_name}; - - {create_query}; - - DROP TABLE IF EXISTS test.kafka_{format_name}_mv; - - CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS - SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name}; - """.format( - topic_name=topic_name, - format_name=format_name, - create_query=create_query_generator( - f"kafka_{format_name}", - "id Int64, blockNo UInt16, val1 String, val2 Float32, val3 UInt8", - topic_list=f"{topic_name}", - consumer_group=f"{topic_name}_group", - format=format_name, - settings=extra_settings, - ), - ) - ) - raw_expected = """\ -0 0 AM 0.5 1 {topic_name} 0 {offset_0} -1 0 AM 0.5 1 {topic_name} 0 {offset_1} -2 0 AM 0.5 1 {topic_name} 0 {offset_1} -3 0 AM 0.5 1 {topic_name} 0 {offset_1} -4 0 AM 0.5 1 {topic_name} 0 {offset_1} -5 0 AM 0.5 1 {topic_name} 0 {offset_1} -6 0 AM 0.5 1 {topic_name} 0 {offset_1} -7 0 AM 0.5 1 {topic_name} 0 {offset_1} -8 0 AM 0.5 1 {topic_name} 0 {offset_1} -9 0 AM 0.5 1 {topic_name} 0 {offset_1} -10 0 AM 0.5 1 {topic_name} 0 {offset_1} -11 0 AM 0.5 1 {topic_name} 0 {offset_1} -12 0 AM 0.5 1 {topic_name} 0 {offset_1} -13 0 AM 0.5 1 {topic_name} 0 {offset_1} -14 0 AM 0.5 1 {topic_name} 0 {offset_1} -15 0 AM 0.5 1 {topic_name} 0 {offset_1} -0 0 AM 0.5 1 {topic_name} 0 {offset_2} -""" - - expected_rows_count = raw_expected.count("\n") - result_checker = lambda res: res.count("\n") == expected_rows_count - res = instance.query_with_retry( - f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", - retry_count=30, - sleep_time=1, - check_callback=result_checker, - ) - assert result_checker(res) - - for format_name, format_opts in list(all_formats.items()): - logging.debug(("Checking {}".format(format_name))) - topic_name = f"format_tests_{format_name}-{topic_postfix}" - # shift offsets by 1 if format supports empty value - offsets = ( - [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] - ) - result = instance.query_with_retry( - "SELECT * FROM test.kafka_{format_name}_mv;".format( - format_name=format_name - ), - check_callback=lambda x: x.count("\n") == raw_expected.count("\n"), - ) - expected = raw_expected.format( - topic_name=topic_name, - offset_0=offsets[0], - offset_1=offsets[1], - offset_2=offsets[2], - ) - assert TSV(result) == TSV(expected), "Proper result for format: {}".format( - format_name - ) - kafka_delete_topic(get_admin_client(kafka_cluster), topic_name) - - -@pytest.mark.parametrize( - "create_query_generator, log_line", - [ - (generate_old_create_table_query, "{}.*Polled offset [0-9]+"), - (generate_new_create_table_query, "{}.*Saved offset"), - ], -) -def test_kafka_rebalance( - kafka_cluster: ClickHouseCluster, - instance: ClickHouseInstance, - create_query_generator, - log_line, -): - NUMBER_OF_CONSURRENT_CONSUMERS = 11 - - instance.query( - """ - DROP TABLE IF EXISTS test.destination; - CREATE TABLE test.destination ( - key UInt64, - value UInt64, - _topic String, - _key String, - _offset UInt64, - _partition UInt64, - _timestamp Nullable(DateTime('UTC')), - _consumed_by LowCardinality(String) - ) - ENGINE = MergeTree() - ORDER BY key; - """ - ) - - admin_client = KafkaAdminClient( - bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) - ) - topic_name = "topic_with_multiple_partitions" + get_topic_postfix( - create_query_generator - ) - table_name_prefix = "kafka_consumer" - keeper_path = f"/clickhouse/{{database}}/{table_name_prefix}" - with kafka_topic(admin_client, topic_name, num_partitions=11): - cancel = threading.Event() - - msg_index = [0] - - def produce(): - while not cancel.is_set(): - messages = [] - for _ in range(59): - messages.append( - json.dumps({"key": msg_index[0], "value": msg_index[0]}) - ) - msg_index[0] += 1 - kafka_produce(kafka_cluster, topic_name, messages) - - kafka_thread = threading.Thread(target=produce) - kafka_thread.start() - - for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): - table_name = f"{table_name_prefix}{consumer_index}" - replica_name = f"r{consumer_index}" - logging.debug(f"Setting up {consumer_index}") - - create_query = create_query_generator( - table_name, - "key UInt64, value UInt64", - topic_list=topic_name, - keeper_path=keeper_path, - replica_name=replica_name, - settings={ - "kafka_max_block_size": 33, - "kafka_flush_interval_ms": 500, - }, - ) - instance.query( - f""" - DROP TABLE IF EXISTS test.{table_name}; - DROP TABLE IF EXISTS test.{table_name}_mv; - {create_query}; - CREATE MATERIALIZED VIEW test.{table_name}_mv TO test.destination AS - SELECT - key, - value, - _topic, - _key, - _offset, - _partition, - _timestamp, - '{table_name}' as _consumed_by - FROM test.{table_name}; - """ - ) - # Waiting for test.kafka_consumerX to start consume ... - instance.wait_for_log_line(log_line.format(table_name)) - - cancel.set() - - # I leave last one working by intent (to finish consuming after all rebalances) - for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS - 1): - logging.debug(("Dropping test.kafka_consumer{}".format(consumer_index))) - instance.query( - "DROP TABLE IF EXISTS test.kafka_consumer{} SYNC".format(consumer_index) - ) - - while 1: - messages_consumed = int( - instance.query("SELECT uniqExact(key) FROM test.destination") - ) - if messages_consumed >= msg_index[0]: - break - time.sleep(1) - logging.debug( - ( - "Waiting for finishing consuming (have {}, should be {})".format( - messages_consumed, msg_index[0] - ) - ) - ) - - logging.debug( - ( - instance.query( - "SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination" - ) - ) - ) - - # Some queries to debug... - # SELECT * FROM test.destination where key in (SELECT key FROM test.destination group by key having count() <> 1) - # select number + 1 as key from numbers(4141) x left join test.destination using (key) where test.destination.key = 0; - # SELECT * FROM test.destination WHERE key between 2360 and 2370 order by key; - # select _partition from test.destination group by _partition having count() <> max(_offset) + 1; - # select toUInt64(0) as _partition, number + 1 as _offset from numbers(400) x left join test.destination using (_partition,_offset) where test.destination.key = 0 order by _offset; - # SELECT * FROM test.destination WHERE _partition = 0 and _offset between 220 and 240 order by _offset; - - # CREATE TABLE test.reference (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', - # kafka_topic_list = 'topic_with_multiple_partitions', - # kafka_group_name = 'rebalance_test_group_reference', - # kafka_format = 'JSONEachRow', - # kafka_max_block_size = 100000; - # - # CREATE MATERIALIZED VIEW test.reference_mv Engine=Log AS - # SELECT key, value, _topic,_key,_offset, _partition, _timestamp, 'reference' as _consumed_by - # FROM test.reference; - # - # select * from test.reference_mv left join test.destination using (key,_topic,_offset,_partition) where test.destination._consumed_by = ''; - - result = int( - instance.query("SELECT count() == uniqExact(key) FROM test.destination") - ) - - for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): - logging.debug(("kafka_consumer{}".format(consumer_index))) - table_name = "kafka_consumer{}".format(consumer_index) - instance.query( - """ - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - """.format( - table_name - ) - ) - - instance.query( - """ - DROP TABLE IF EXISTS test.destination; - """ - ) - - kafka_thread.join() - - assert result == 1, "Messages from kafka get duplicated!" - - @pytest.mark.parametrize( "create_query_generator", [generate_old_create_table_query, generate_new_create_table_query], diff --git a/tests/integration/test_storage_kafka/test_5.py b/tests/integration/test_storage_kafka/test_5.py new file mode 100644 index 00000000000..118b86480fa --- /dev/null +++ b/tests/integration/test_storage_kafka/test_5.py @@ -0,0 +1,630 @@ +import json +import logging +import threading +import time + +import pytest +from confluent_kafka.avro.cached_schema_registry_client import ( + CachedSchemaRegistryClient, +) +from helpers.cluster import ClickHouseCluster, ClickHouseInstance, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient +from test_storage_kafka.conftest import conftest_cluster, init_cluster_and_instance +from test_storage_kafka.kafka_tests_utils import ( + avro_confluent_message, + avro_message, + generate_new_create_table_query, + generate_old_create_table_query, + get_admin_client, + get_topic_postfix, + kafka_delete_topic, + kafka_produce, + kafka_topic, +) + +# Skip all tests on ARM +if is_arm(): + pytestmark = pytest.mark.skip + + +@pytest.mark.parametrize( + "create_query_generator", + [generate_old_create_table_query, generate_new_create_table_query], +) +def test_kafka_formats( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, +): + schema_registry_client = CachedSchemaRegistryClient( + {"url": f"http://localhost:{kafka_cluster.schema_registry_port}"} + ) + + # data was dumped from clickhouse itself in a following manner + # clickhouse-client --format=Native --query='SELECT toInt64(number) as id, toUInt16( intDiv( id, 65536 ) ) as blockNo, reinterpretAsString(19777) as val1, toFloat32(0.5) as val2, toUInt8(1) as val3 from numbers(100) ORDER BY id' | xxd -ps | tr -d '\n' | sed 's/\(..\)/\\x\1/g' + + all_formats = { + ## Text formats ## + # dumped with clickhouse-client ... | perl -pe 's/\n/\\n/; s/\t/\\t/g;' + "JSONEachRow": { + "data_sample": [ + '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', + '{"id":"1","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"2","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"3","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"4","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"5","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"6","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"7","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"8","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"9","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"10","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"11","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"12","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"13","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"14","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n{"id":"15","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', + '{"id":"0","blockNo":0,"val1":"AM","val2":0.5,"val3":1}\n', + ], + "supports_empty_value": True, + }, + # JSONAsString doesn't fit to that test, and tested separately + "JSONCompactEachRow": { + "data_sample": [ + '["0", 0, "AM", 0.5, 1]\n', + '["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n', + '["0", 0, "AM", 0.5, 1]\n', + ], + "supports_empty_value": True, + }, + "JSONCompactEachRowWithNamesAndTypes": { + "data_sample": [ + '["id", "blockNo", "val1", "val2", "val3"]\n["Int64", "UInt16", "String", "Float32", "UInt8"]\n["0", 0, "AM", 0.5, 1]\n', + '["id", "blockNo", "val1", "val2", "val3"]\n["Int64", "UInt16", "String", "Float32", "UInt8"]\n["1", 0, "AM", 0.5, 1]\n["2", 0, "AM", 0.5, 1]\n["3", 0, "AM", 0.5, 1]\n["4", 0, "AM", 0.5, 1]\n["5", 0, "AM", 0.5, 1]\n["6", 0, "AM", 0.5, 1]\n["7", 0, "AM", 0.5, 1]\n["8", 0, "AM", 0.5, 1]\n["9", 0, "AM", 0.5, 1]\n["10", 0, "AM", 0.5, 1]\n["11", 0, "AM", 0.5, 1]\n["12", 0, "AM", 0.5, 1]\n["13", 0, "AM", 0.5, 1]\n["14", 0, "AM", 0.5, 1]\n["15", 0, "AM", 0.5, 1]\n', + '["id", "blockNo", "val1", "val2", "val3"]\n["Int64", "UInt16", "String", "Float32", "UInt8"]\n["0", 0, "AM", 0.5, 1]\n', + # '' + # On empty message exception: Cannot parse input: expected '[' at end of stream., Stack trace (when copying this message, always include the lines below): + # /src/IO/ReadHelpers.h:175: DB::assertChar(char, DB::ReadBuffer&) @ 0x15db231a in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp:0: DB::JSONCompactEachRowRowInputFormat::readPrefix() @ 0x1dee6bd6 in /usr/bin/clickhouse + # /src/Processors/Formats/IRowInputFormat.cpp:0: DB::IRowInputFormat::generate() @ 0x1de72710 in /usr/bin/clickhouse + ], + }, + "TSKV": { + "data_sample": [ + "id=0\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\n", + "id=1\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=2\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=3\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=4\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=5\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=6\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=7\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=8\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=9\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=10\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=11\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=12\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=13\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=14\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\nid=15\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\n", + "id=0\tblockNo=0\tval1=AM\tval2=0.5\tval3=1\n", + # '' + # On empty message exception: Unexpected end of stream while reading key name from TSKV format + # /src/Processors/Formats/Impl/TSKVRowInputFormat.cpp:88: DB::readName(DB::ReadBuffer&, StringRef&, std::__1::basic_string, std::__1::allocator >&) @ 0x1df8c098 in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/TSKVRowInputFormat.cpp:114: DB::TSKVRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df8ae3e in /usr/bin/clickhouse + # /src/Processors/Formats/IRowInputFormat.cpp:64: DB::IRowInputFormat::generate() @ 0x1de727cf in /usr/bin/clickhouse + ], + }, + "CSV": { + "data_sample": [ + '0,0,"AM",0.5,1\n', + '1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n', + '0,0,"AM",0.5,1\n', + ], + "supports_empty_value": True, + }, + "TSV": { + "data_sample": [ + "0\t0\tAM\t0.5\t1\n", + "1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", + "0\t0\tAM\t0.5\t1\n", + ], + "supports_empty_value": True, + }, + "CSVWithNames": { + "data_sample": [ + '"id","blockNo","val1","val2","val3"\n0,0,"AM",0.5,1\n', + '"id","blockNo","val1","val2","val3"\n1,0,"AM",0.5,1\n2,0,"AM",0.5,1\n3,0,"AM",0.5,1\n4,0,"AM",0.5,1\n5,0,"AM",0.5,1\n6,0,"AM",0.5,1\n7,0,"AM",0.5,1\n8,0,"AM",0.5,1\n9,0,"AM",0.5,1\n10,0,"AM",0.5,1\n11,0,"AM",0.5,1\n12,0,"AM",0.5,1\n13,0,"AM",0.5,1\n14,0,"AM",0.5,1\n15,0,"AM",0.5,1\n', + '"id","blockNo","val1","val2","val3"\n0,0,"AM",0.5,1\n', + # '', + # On empty message exception happens: Attempt to read after eof + # /src/IO/VarInt.h:122: DB::throwReadAfterEOF() @ 0x15c34487 in /usr/bin/clickhouse + # /src/IO/ReadHelpers.cpp:583: void DB::readCSVStringInto, std::__1::allocator > >(std::__1::basic_string, std::__1::allocator >&, DB::ReadBuffer&, DB::FormatSettings::CSV const&) @ 0x15c961e1 in /usr/bin/clickhouse + # /src/IO/ReadHelpers.cpp:678: DB::readCSVString(std::__1::basic_string, std::__1::allocator >&, DB::ReadBuffer&, DB::FormatSettings::CSV const&) @ 0x15c8dfae in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/CSVRowInputFormat.cpp:170: DB::CSVRowInputFormat::readPrefix() @ 0x1dec46f7 in /usr/bin/clickhouse + # /src/Processors/Formats/IRowInputFormat.cpp:0: DB::IRowInputFormat::generate() @ 0x1de72710 in /usr/bin/clickhouse + # /src/Processors/ISource.cpp:48: DB::ISource::work() @ 0x1dd79737 in /usr/bin/clickhouse + ], + }, + "Values": { + "data_sample": [ + "(0,0,'AM',0.5,1)", + "(1,0,'AM',0.5,1),(2,0,'AM',0.5,1),(3,0,'AM',0.5,1),(4,0,'AM',0.5,1),(5,0,'AM',0.5,1),(6,0,'AM',0.5,1),(7,0,'AM',0.5,1),(8,0,'AM',0.5,1),(9,0,'AM',0.5,1),(10,0,'AM',0.5,1),(11,0,'AM',0.5,1),(12,0,'AM',0.5,1),(13,0,'AM',0.5,1),(14,0,'AM',0.5,1),(15,0,'AM',0.5,1)", + "(0,0,'AM',0.5,1)", + ], + "supports_empty_value": True, + }, + "TSVWithNames": { + "data_sample": [ + "id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n", + "id\tblockNo\tval1\tval2\tval3\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", + "id\tblockNo\tval1\tval2\tval3\n0\t0\tAM\t0.5\t1\n", + ], + "supports_empty_value": True, + }, + "TSVWithNamesAndTypes": { + "data_sample": [ + "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\t0\tAM\t0.5\t1\n", + "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", + "id\tblockNo\tval1\tval2\tval3\nInt64\tUInt16\tString\tFloat32\tUInt8\n0\t0\tAM\t0.5\t1\n", + # '', + # On empty message exception happens: Cannot parse input: expected '\n' at end of stream. + # /src/IO/ReadHelpers.cpp:84: DB::throwAtAssertionFailed(char const*, DB::ReadBuffer&) @ 0x15c8d8ec in /usr/bin/clickhouse + # /src/IO/ReadHelpers.h:175: DB::assertChar(char, DB::ReadBuffer&) @ 0x15db231a in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp:24: DB::skipTSVRow(DB::ReadBuffer&, unsigned long) @ 0x1df92fac in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp:168: DB::TabSeparatedRowInputFormat::readPrefix() @ 0x1df92df0 in /usr/bin/clickhouse + # /src/Processors/Formats/IRowInputFormat.cpp:0: DB::IRowInputFormat::generate() @ 0x1de72710 in /usr/bin/clickhouse + ], + }, + "CustomSeparated": { + "data_sample": [ + "0\t0\tAM\t0.5\t1\n", + "1\t0\tAM\t0.5\t1\n2\t0\tAM\t0.5\t1\n3\t0\tAM\t0.5\t1\n4\t0\tAM\t0.5\t1\n5\t0\tAM\t0.5\t1\n6\t0\tAM\t0.5\t1\n7\t0\tAM\t0.5\t1\n8\t0\tAM\t0.5\t1\n9\t0\tAM\t0.5\t1\n10\t0\tAM\t0.5\t1\n11\t0\tAM\t0.5\t1\n12\t0\tAM\t0.5\t1\n13\t0\tAM\t0.5\t1\n14\t0\tAM\t0.5\t1\n15\t0\tAM\t0.5\t1\n", + "0\t0\tAM\t0.5\t1\n", + ], + }, + "Template": { + "data_sample": [ + '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', + '(id = 1, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 2, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 3, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 4, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 5, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 6, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 7, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 8, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 9, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 10, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 11, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 12, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 13, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 14, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 15, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', + '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', + ], + "extra_settings": {"format_template_row": "template_row.format"}, + }, + "Regexp": { + "data_sample": [ + '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', + '(id = 1, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 2, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 3, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 4, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 5, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 6, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 7, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 8, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 9, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 10, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 11, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 12, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 13, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 14, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)\n(id = 15, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', + '(id = 0, blockNo = 0, val1 = "AM", val2 = 0.5, val3 = 1)', + # '' + # On empty message exception happens: Line "" doesn't match the regexp.: (at row 1) + # /src/Processors/Formats/Impl/RegexpRowInputFormat.cpp:140: DB::RegexpRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df82fcb in /usr/bin/clickhouse + ], + "extra_settings": { + "format_regexp": r"\(id = (.+?), blockNo = (.+?), val1 = \"(.+?)\", val2 = (.+?), val3 = (.+?)\)", + "format_regexp_escaping_rule": "Escaped", + }, + }, + ## BINARY FORMATS + # dumped with + # clickhouse-client ... | xxd -ps -c 200 | tr -d '\n' | sed 's/\(..\)/\\x\1/g' + "Native": { + "data_sample": [ + b"\x05\x01\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x00\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01", + b"\x05\x0f\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01", + b"\x05\x01\x02\x69\x64\x05\x49\x6e\x74\x36\x34\x00\x00\x00\x00\x00\x00\x00\x00\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x06\x55\x49\x6e\x74\x31\x36\x00\x00\x04\x76\x61\x6c\x31\x06\x53\x74\x72\x69\x6e\x67\x02\x41\x4d\x04\x76\x61\x6c\x32\x07\x46\x6c\x6f\x61\x74\x33\x32\x00\x00\x00\x3f\x04\x76\x61\x6c\x33\x05\x55\x49\x6e\x74\x38\x01", + # '' + # On empty message exception happens: DB::Exception: Attempt to read after eof + # 1. DB::throwReadAfterEOF() @ 0xb76449b in /usr/bin/clickhouse + # 2. ? @ 0xb79cb0b in /usr/bin/clickhouse + # 3. DB::NativeReader::read() @ 0x16e7a084 in /usr/bin/clickhouse + # 4. DB::NativeInputFormat::generate() @ 0x16f76922 in /usr/bin/clickhouse + # 5. DB::ISource::tryGenerate() @ 0x16e90bd5 in /usr/bin/clickhouse + # 6. DB::ISource::work() @ 0x16e9087a in /usr/bin/clickhouse + ], + }, + "MsgPack": { + "data_sample": [ + b"\x00\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01", + b"\x01\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x02\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x03\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x04\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x05\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x06\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x07\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x08\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x09\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0a\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0b\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0c\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0d\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0e\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01\x0f\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01", + b"\x00\x00\xa2\x41\x4d\xca\x3f\x00\x00\x00\x01", + # '' + # On empty message exception happens: Unexpected end of file while parsing msgpack object.: (at row 1) + # coming from Processors/Formats/Impl/MsgPackRowInputFormat.cpp:170 + ], + }, + "RowBinary": { + "data_sample": [ + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", + b"\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", + b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", + # '' + # On empty message exception happens: DB::Exception: Cannot read all data. Bytes read: 0. Bytes expected: 8. + # /src/IO/ReadBuffer.h:157: DB::ReadBuffer::readStrict(char*, unsigned long) @ 0x15c6894d in /usr/bin/clickhouse + # /src/IO/ReadHelpers.h:108: void DB::readPODBinary(long&, DB::ReadBuffer&) @ 0x15c67715 in /usr/bin/clickhouse + # /src/IO/ReadHelpers.h:737: std::__1::enable_if, void>::type DB::readBinary(long&, DB::ReadBuffer&) @ 0x15e7afbd in /usr/bin/clickhouse + # /src/DataTypes/DataTypeNumberBase.cpp:180: DB::DataTypeNumberBase::deserializeBinary(DB::IColumn&, DB::ReadBuffer&) const @ 0x1cace581 in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/BinaryRowInputFormat.cpp:22: DB::BinaryRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1dea2c0b in /usr/bin/clickhouse + ], + }, + "RowBinaryWithNamesAndTypes": { + "data_sample": [ + b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x55\x49\x6e\x74\x31\x36\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", + b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x55\x49\x6e\x74\x31\x36\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", + b"\x05\x02\x69\x64\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x04\x76\x61\x6c\x31\x04\x76\x61\x6c\x32\x04\x76\x61\x6c\x33\x05\x49\x6e\x74\x36\x34\x06\x55\x49\x6e\x74\x31\x36\x06\x53\x74\x72\x69\x6e\x67\x07\x46\x6c\x6f\x61\x74\x33\x32\x05\x55\x49\x6e\x74\x38\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x41\x4d\x00\x00\x00\x3f\x01", + # '' + # !!! On empty message segfault: Address not mapped to object + # /contrib/FastMemcpy/FastMemcpy.h:666: memcpy_fast @ 0x21742d65 in /usr/bin/clickhouse + # /contrib/FastMemcpy/memcpy_wrapper.c:5: memcpy @ 0x21738235 in /usr/bin/clickhouse + # /src/IO/ReadBuffer.h:145: DB::ReadBuffer::read(char*, unsigned long) @ 0x15c369d7 in /usr/bin/clickhouse + # /src/IO/ReadBuffer.h:155: DB::ReadBuffer::readStrict(char*, unsigned long) @ 0x15c68878 in /usr/bin/clickhouse + # /src/DataTypes/DataTypeString.cpp:84: DB::DataTypeString::deserializeBinary(DB::IColumn&, DB::ReadBuffer&) const @ 0x1cad12e7 in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/BinaryRowInputFormat.cpp:22: DB::BinaryRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1dea2c0b in /usr/bin/clickhouse + ], + }, + "Protobuf": { + "data_sample": [ + b"\x0b\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01", + b"\x0d\x08\x01\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x02\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x03\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x04\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x05\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x06\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x07\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x08\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x09\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0a\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0b\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0c\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0d\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0e\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01\x0d\x08\x0f\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01", + b"\x0b\x1a\x02\x41\x4d\x25\x00\x00\x00\x3f\x28\x01", + # '' + # On empty message exception: Attempt to read after eof + # /src/IO/ReadBuffer.h:184: DB::ReadBuffer::throwReadAfterEOF() @ 0x15c9699b in /usr/bin/clickhouse + # /src/Formats/ProtobufReader.h:115: DB::ProtobufReader::SimpleReader::startMessage() @ 0x1df4f828 in /usr/bin/clickhouse + # /src/Formats/ProtobufReader.cpp:1119: DB::ProtobufReader::startMessage() @ 0x1df5356c in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp:25: DB::ProtobufRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1df4cc71 in /usr/bin/clickhouse + # /src/Processors/Formats/IRowInputFormat.cpp:64: DB::IRowInputFormat::generate() @ 0x1de727cf in /usr/bin/clickhouse + ], + "extra_settings": {"kafka_schema": "test:TestMessage"}, + }, + "ORC": { + "data_sample": [ + b"\x4f\x52\x43\x11\x00\x00\x0a\x06\x12\x04\x08\x01\x50\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x30\x00\x00\xe3\x12\xe7\x62\x65\x00\x01\x21\x3e\x0e\x46\x25\x0e\x2e\x46\x03\x21\x46\x03\x09\xa6\x00\x06\x00\x32\x00\x00\xe3\x92\xe4\x62\x65\x00\x01\x21\x01\x0e\x46\x25\x2e\x2e\x26\x47\x5f\x21\x20\x96\x60\x09\x60\x00\x00\x36\x00\x00\xe3\x92\xe1\x62\x65\x00\x01\x21\x61\x0e\x46\x23\x5e\x2e\x46\x03\x21\x66\x03\x3d\x53\x29\x10\x11\xc0\x00\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\x05\x00\x00\xff\x00\x03\x00\x00\x30\x07\x00\x00\x40\x00\x80\x05\x00\x00\x41\x4d\x07\x00\x00\x42\x00\x80\x03\x00\x00\x0a\x07\x00\x00\x42\x00\x80\x05\x00\x00\xff\x01\x88\x00\x00\x4d\xca\xc1\x0a\x80\x30\x0c\x03\xd0\x2e\x6b\xcb\x98\x17\xf1\x14\x50\xfc\xff\xcf\xb4\x66\x1e\x3c\x84\x47\x9a\xce\x1c\xb9\x1b\xb7\xf9\xda\x48\x09\x9e\xb2\xf3\x92\xce\x5b\x86\xf6\x56\x7f\x21\x41\x2f\x51\xa6\x7a\xd7\x1d\xe5\xea\xae\x3d\xca\xd5\x83\x71\x60\xd8\x17\xfc\x62\x0f\xa8\x00\x00\xe3\x4a\xe6\x62\xe1\x60\x0c\x60\xe0\xe2\xe3\x60\x14\x62\xe3\x60\x10\x60\x90\x60\x08\x60\x88\x60\xe5\x12\xe0\x60\x54\xe2\xe0\x62\x34\x10\x62\x34\x90\x60\x02\x8a\x70\x71\x09\x01\x45\xb8\xb8\x98\x1c\x7d\x85\x80\x58\x82\x05\x28\xc6\xcd\x25\xca\xc1\x68\xc4\x0b\x52\xc5\x6c\xa0\x67\x2a\x05\x22\xc0\x4a\x21\x86\x31\x09\x30\x81\xb5\xb2\x02\x00\x36\x01\x00\x25\x8c\xbd\x0a\xc2\x30\x14\x85\x73\x6f\x92\xf6\x92\x6a\x09\x01\x21\x64\x92\x4e\x75\x91\x58\x71\xc9\x64\x27\x5d\x2c\x1d\x5d\xfd\x59\xc4\x42\x37\x5f\xc0\x17\xe8\x23\x9b\xc6\xe1\x3b\x70\x0f\xdf\xb9\xc4\xf5\x17\x5d\x41\x5c\x4f\x60\x37\xeb\x53\x0d\x55\x4d\x0b\x23\x01\xb9\x90\x2e\xbf\x0f\xe3\xe3\xdd\x8d\x0e\x5f\x4f\x27\x3e\xb7\x61\x97\xb2\x49\xb9\xaf\x90\x20\x92\x27\x32\x2a\x6b\xf4\xf3\x0d\x1e\x82\x20\xe8\x59\x28\x09\x4c\x46\x4c\x33\xcb\x7a\x76\x95\x41\x47\x9f\x14\x78\x03\xde\x62\x6c\x54\x30\xb1\x51\x0a\xdb\x8b\x89\x58\x11\xbb\x22\xac\x08\x9a\xe5\x6c\x71\xbf\x3d\xb8\x39\x92\xfa\x7f\x86\x1a\xd3\x54\x1e\xa7\xee\xcc\x7e\x08\x9e\x01\x10\x01\x18\x80\x80\x10\x22\x02\x00\x0c\x28\x57\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", + b"\x4f\x52\x43\x11\x00\x00\x0a\x06\x12\x04\x08\x0f\x50\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x0f\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x30\x00\x00\xe3\x12\xe7\x62\x65\x00\x01\x21\x3e\x0e\x7e\x25\x0e\x2e\x46\x43\x21\x46\x4b\x09\xad\x00\x06\x00\x33\x00\x00\x0a\x17\x0a\x03\x00\x00\x00\x12\x10\x08\x0f\x22\x0a\x0a\x02\x41\x4d\x12\x02\x41\x4d\x18\x3c\x50\x00\x3a\x00\x00\xe3\x92\xe1\x62\x65\x00\x01\x21\x61\x0e\x7e\x23\x5e\x2e\x46\x03\x21\x66\x03\x3d\x53\x29\x66\x73\x3d\xd3\x00\x06\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x0f\x12\x06\x08\x02\x10\x02\x18\x1e\x50\x00\x05\x00\x00\x0c\x00\x2b\x00\x00\x31\x32\x33\x34\x35\x36\x37\x38\x39\x31\x30\x31\x31\x31\x32\x31\x33\x31\x34\x31\x35\x09\x00\x00\x06\x01\x03\x02\x09\x00\x00\xc0\x0e\x00\x00\x07\x00\x00\x42\x00\x80\x05\x00\x00\x41\x4d\x0a\x00\x00\xe3\xe2\x42\x01\x00\x09\x00\x00\xc0\x0e\x02\x00\x05\x00\x00\x0c\x01\x94\x00\x00\x2d\xca\xc1\x0e\x80\x30\x08\x03\xd0\xc1\x60\x2e\xf3\x62\x76\x6a\xe2\x0e\xfe\xff\x57\x5a\x3b\x0f\xe4\x51\xe8\x68\xbd\x5d\x05\xe7\xf8\x34\x40\x3a\x6e\x59\xb1\x64\xe0\x91\xa9\xbf\xb1\x97\xd2\x95\x9d\x1e\xca\x55\x3a\x6d\xb4\xd2\xdd\x0b\x74\x9a\x74\xf7\x12\x39\xbd\x97\x7f\x7c\x06\xbb\xa6\x8d\x97\x17\xb4\x00\x00\xe3\x4a\xe6\x62\xe1\xe0\x0f\x60\xe0\xe2\xe3\xe0\x17\x62\xe3\x60\x10\x60\x90\x60\x08\x60\x88\x60\xe5\x12\xe0\xe0\x57\xe2\xe0\x62\x34\x14\x62\xb4\x94\xd0\x02\x8a\xc8\x73\x09\x01\x45\xb8\xb8\x98\x1c\x7d\x85\x80\x58\xc2\x06\x28\x26\xc4\x25\xca\xc1\x6f\xc4\xcb\xc5\x68\x20\xc4\x6c\xa0\x67\x2a\xc5\x6c\xae\x67\x0a\x14\xe6\x87\x1a\xc6\x24\xc0\x24\x21\x07\x32\x0c\x00\x4a\x01\x00\xe3\x60\x16\x58\xc3\x24\xc5\xcd\xc1\x2c\x30\x89\x51\xc2\x4b\xc1\x57\x83\x5f\x49\x83\x83\x47\x88\x95\x91\x89\x99\x85\x55\x8a\x3d\x29\x27\x3f\x39\xdb\x2f\x5f\x8a\x29\x33\x45\x8a\xa5\x2c\x31\xc7\x10\x4c\x1a\x81\x49\x63\x25\x26\x0e\x46\x20\x66\x07\x63\x36\x0e\x3e\x0d\x26\x03\x10\x9f\xd1\x80\xdf\x8a\x85\x83\x3f\x80\xc1\x8a\x8f\x83\x5f\x88\x8d\x83\x41\x80\x41\x82\x21\x80\x21\x82\xd5\x4a\x80\x83\x5f\x89\x83\x8b\xd1\x50\x88\xd1\x52\x42\x0b\x28\x22\x6f\x25\x04\x14\xe1\xe2\x62\x72\xf4\x15\x02\x62\x09\x1b\xa0\x98\x90\x95\x28\x07\xbf\x11\x2f\x17\xa3\x81\x10\xb3\x81\x9e\xa9\x14\xb3\xb9\x9e\x29\x50\x98\x1f\x6a\x18\x93\x00\x93\x84\x1c\xc8\x30\x87\x09\x7e\x1e\x0c\x00\x08\xa8\x01\x10\x01\x18\x80\x80\x10\x22\x02\x00\x0c\x28\x5d\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", + b"\x4f\x52\x43\x11\x00\x00\x0a\x06\x12\x04\x08\x01\x50\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x00\x10\x00\x18\x00\x50\x00\x30\x00\x00\xe3\x12\xe7\x62\x65\x00\x01\x21\x3e\x0e\x46\x25\x0e\x2e\x46\x03\x21\x46\x03\x09\xa6\x00\x06\x00\x32\x00\x00\xe3\x92\xe4\x62\x65\x00\x01\x21\x01\x0e\x46\x25\x2e\x2e\x26\x47\x5f\x21\x20\x96\x60\x09\x60\x00\x00\x36\x00\x00\xe3\x92\xe1\x62\x65\x00\x01\x21\x61\x0e\x46\x23\x5e\x2e\x46\x03\x21\x66\x03\x3d\x53\x29\x10\x11\xc0\x00\x00\x2b\x00\x00\x0a\x13\x0a\x03\x00\x00\x00\x12\x0c\x08\x01\x12\x06\x08\x02\x10\x02\x18\x02\x50\x00\x05\x00\x00\xff\x00\x03\x00\x00\x30\x07\x00\x00\x40\x00\x80\x05\x00\x00\x41\x4d\x07\x00\x00\x42\x00\x80\x03\x00\x00\x0a\x07\x00\x00\x42\x00\x80\x05\x00\x00\xff\x01\x88\x00\x00\x4d\xca\xc1\x0a\x80\x30\x0c\x03\xd0\x2e\x6b\xcb\x98\x17\xf1\x14\x50\xfc\xff\xcf\xb4\x66\x1e\x3c\x84\x47\x9a\xce\x1c\xb9\x1b\xb7\xf9\xda\x48\x09\x9e\xb2\xf3\x92\xce\x5b\x86\xf6\x56\x7f\x21\x41\x2f\x51\xa6\x7a\xd7\x1d\xe5\xea\xae\x3d\xca\xd5\x83\x71\x60\xd8\x17\xfc\x62\x0f\xa8\x00\x00\xe3\x4a\xe6\x62\xe1\x60\x0c\x60\xe0\xe2\xe3\x60\x14\x62\xe3\x60\x10\x60\x90\x60\x08\x60\x88\x60\xe5\x12\xe0\x60\x54\xe2\xe0\x62\x34\x10\x62\x34\x90\x60\x02\x8a\x70\x71\x09\x01\x45\xb8\xb8\x98\x1c\x7d\x85\x80\x58\x82\x05\x28\xc6\xcd\x25\xca\xc1\x68\xc4\x0b\x52\xc5\x6c\xa0\x67\x2a\x05\x22\xc0\x4a\x21\x86\x31\x09\x30\x81\xb5\xb2\x02\x00\x36\x01\x00\x25\x8c\xbd\x0a\xc2\x30\x14\x85\x73\x6f\x92\xf6\x92\x6a\x09\x01\x21\x64\x92\x4e\x75\x91\x58\x71\xc9\x64\x27\x5d\x2c\x1d\x5d\xfd\x59\xc4\x42\x37\x5f\xc0\x17\xe8\x23\x9b\xc6\xe1\x3b\x70\x0f\xdf\xb9\xc4\xf5\x17\x5d\x41\x5c\x4f\x60\x37\xeb\x53\x0d\x55\x4d\x0b\x23\x01\xb9\x90\x2e\xbf\x0f\xe3\xe3\xdd\x8d\x0e\x5f\x4f\x27\x3e\xb7\x61\x97\xb2\x49\xb9\xaf\x90\x20\x92\x27\x32\x2a\x6b\xf4\xf3\x0d\x1e\x82\x20\xe8\x59\x28\x09\x4c\x46\x4c\x33\xcb\x7a\x76\x95\x41\x47\x9f\x14\x78\x03\xde\x62\x6c\x54\x30\xb1\x51\x0a\xdb\x8b\x89\x58\x11\xbb\x22\xac\x08\x9a\xe5\x6c\x71\xbf\x3d\xb8\x39\x92\xfa\x7f\x86\x1a\xd3\x54\x1e\xa7\xee\xcc\x7e\x08\x9e\x01\x10\x01\x18\x80\x80\x10\x22\x02\x00\x0c\x28\x57\x30\x06\x82\xf4\x03\x03\x4f\x52\x43\x18", + # '' + # On empty message exception: IOError: File size too small, Stack trace (when copying this message, always include the lines below): + # /src/Processors/Formats/Impl/ORCBlockInputFormat.cpp:36: DB::ORCBlockInputFormat::generate() @ 0x1df282a6 in /usr/bin/clickhouse + # /src/Processors/ISource.cpp:48: DB::ISource::work() @ 0x1dd79737 in /usr/bin/clickhouse + ], + }, + "CapnProto": { + "data_sample": [ + b"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00", + b"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x07\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x09\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00", + b"\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x02\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x3f\x01\x00\x00\x00\x1a\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00", + # '' + # On empty message exception: Cannot read all data. Bytes read: 0. Bytes expected: 4. + # /src/IO/ReadBuffer.h:157: DB::ReadBuffer::readStrict(char*, unsigned long) @ 0x15c6894d in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp:212: DB::CapnProtoRowInputFormat::readMessage() @ 0x1ded1cab in /usr/bin/clickhouse + # /src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp:241: DB::CapnProtoRowInputFormat::readRow(std::__1::vector::mutable_ptr, std::__1::allocator::mutable_ptr > >&, DB::RowReadExtension&) @ 0x1ded205d in /usr/bin/clickhouse + ], + "extra_settings": {"kafka_schema": "test:TestRecordStruct"}, + }, + "Parquet": { + "data_sample": [ + b"\x50\x41\x52\x31\x15\x04\x15\x10\x15\x14\x4c\x15\x02\x15\x04\x12\x00\x00\x08\x1c\x00\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x0c\x15\x10\x4c\x15\x02\x15\x04\x12\x00\x00\x06\x14\x02\x00\x00\x00\x41\x4d\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x03\x08\x01\x02\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x3f\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x03\x08\x01\x02\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x01\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x15\x02\x19\x6c\x35\x00\x18\x06\x73\x63\x68\x65\x6d\x61\x15\x0a\x00\x15\x04\x25\x00\x18\x02\x69\x64\x00\x15\x02\x25\x00\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x25\x18\x4c\xac\x13\x10\x12\x00\x00\x00\x15\x0c\x25\x00\x18\x04\x76\x61\x6c\x31\x25\x00\x4c\x1c\x00\x00\x00\x15\x08\x25\x00\x18\x04\x76\x61\x6c\x32\x00\x15\x02\x25\x00\x18\x04\x76\x61\x6c\x33\x25\x16\x4c\xac\x13\x08\x12\x00\x00\x00\x16\x02\x19\x1c\x19\x5c\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x16\x98\x05\x16\x02\x00\x28\x22\x70\x61\x72\x71\x75\x65\x74\x2d\x63\x70\x70\x20\x76\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x35\x2e\x31\x2d\x53\x4e\x41\x50\x53\x48\x4f\x54\x19\x5c\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00\xc4\x01\x00\x00\x50\x41\x52\x31", + b"\x50\x41\x52\x31\x15\x04\x15\xf0\x01\x15\x90\x01\x4c\x15\x1e\x15\x04\x12\x00\x00\x78\x04\x01\x00\x09\x01\x00\x02\x09\x07\x04\x00\x03\x0d\x08\x00\x04\x0d\x08\x00\x05\x0d\x08\x00\x06\x0d\x08\x00\x07\x0d\x08\x00\x08\x0d\x08\x00\x09\x0d\x08\x00\x0a\x0d\x08\x00\x0b\x0d\x08\x00\x0c\x0d\x08\x00\x0d\x0d\x08\x3c\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x14\x15\x18\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x18\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x24\x04\x05\x10\x32\x54\x76\x98\xba\xdc\x0e\x26\xca\x02\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x1e\x16\x9e\x03\x16\xc2\x02\x26\xb8\x01\x26\x08\x1c\x18\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x1e\x00\x26\xd8\x04\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\x8c\x04\x26\xe4\x03\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x0c\x15\x10\x4c\x15\x02\x15\x04\x12\x00\x00\x06\x14\x02\x00\x00\x00\x41\x4d\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x03\x08\x01\x1e\x00\x26\xb2\x06\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x1e\x16\x68\x16\x70\x26\xee\x05\x26\xc2\x05\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x3f\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x03\x08\x01\x1e\x00\x26\x9a\x08\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x1e\x16\x84\x01\x16\x8c\x01\x26\xb6\x07\x26\x8e\x07\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x01\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x1e\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x03\x08\x01\x1e\x00\x26\x8e\x0a\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\xc2\x09\x26\x9a\x09\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x15\x02\x19\x6c\x35\x00\x18\x06\x73\x63\x68\x65\x6d\x61\x15\x0a\x00\x15\x04\x25\x00\x18\x02\x69\x64\x00\x15\x02\x25\x00\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x25\x18\x4c\xac\x13\x10\x12\x00\x00\x00\x15\x0c\x25\x00\x18\x04\x76\x61\x6c\x31\x25\x00\x4c\x1c\x00\x00\x00\x15\x08\x25\x00\x18\x04\x76\x61\x6c\x32\x00\x15\x02\x25\x00\x18\x04\x76\x61\x6c\x33\x25\x16\x4c\xac\x13\x08\x12\x00\x00\x00\x16\x1e\x19\x1c\x19\x5c\x26\xca\x02\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x1e\x16\x9e\x03\x16\xc2\x02\x26\xb8\x01\x26\x08\x1c\x18\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x0f\x00\x00\x00\x00\x00\x00\x00\x18\x08\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x26\xd8\x04\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\x8c\x04\x26\xe4\x03\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x26\xb2\x06\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x1e\x16\x68\x16\x70\x26\xee\x05\x26\xc2\x05\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x26\x9a\x08\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x1e\x16\x84\x01\x16\x8c\x01\x26\xb6\x07\x26\x8e\x07\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x26\x8e\x0a\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x1e\x16\x6c\x16\x74\x26\xc2\x09\x26\x9a\x09\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x16\xa6\x06\x16\x1e\x00\x28\x22\x70\x61\x72\x71\x75\x65\x74\x2d\x63\x70\x70\x20\x76\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x35\x2e\x31\x2d\x53\x4e\x41\x50\x53\x48\x4f\x54\x19\x5c\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00\xc5\x01\x00\x00\x50\x41\x52\x31", + b"\x50\x41\x52\x31\x15\x04\x15\x10\x15\x14\x4c\x15\x02\x15\x04\x12\x00\x00\x08\x1c\x00\x00\x00\x00\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x15\x04\x15\x0c\x15\x10\x4c\x15\x02\x15\x04\x12\x00\x00\x06\x14\x02\x00\x00\x00\x41\x4d\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x03\x08\x01\x02\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x00\x00\x00\x3f\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x03\x08\x01\x02\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x15\x04\x15\x08\x15\x0c\x4c\x15\x02\x15\x04\x12\x00\x00\x04\x0c\x01\x00\x00\x00\x15\x00\x15\x06\x15\x0a\x2c\x15\x02\x15\x04\x15\x06\x15\x06\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x03\x08\x01\x02\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x15\x02\x19\x6c\x35\x00\x18\x06\x73\x63\x68\x65\x6d\x61\x15\x0a\x00\x15\x04\x25\x00\x18\x02\x69\x64\x00\x15\x02\x25\x00\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x25\x18\x4c\xac\x13\x10\x12\x00\x00\x00\x15\x0c\x25\x00\x18\x04\x76\x61\x6c\x31\x25\x00\x4c\x1c\x00\x00\x00\x15\x08\x25\x00\x18\x04\x76\x61\x6c\x32\x00\x15\x02\x25\x00\x18\x04\x76\x61\x6c\x33\x25\x16\x4c\xac\x13\x08\x12\x00\x00\x00\x16\x02\x19\x1c\x19\x5c\x26\xbc\x01\x1c\x15\x04\x19\x35\x04\x00\x06\x19\x18\x02\x69\x64\x15\x02\x16\x02\x16\xac\x01\x16\xb4\x01\x26\x38\x26\x08\x1c\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x16\x00\x28\x08\x00\x00\x00\x00\x00\x00\x00\x00\x18\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x26\xc8\x03\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x07\x62\x6c\x6f\x63\x6b\x4e\x6f\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xfc\x02\x26\xd4\x02\x1c\x36\x00\x28\x04\x00\x00\x00\x00\x18\x04\x00\x00\x00\x00\x00\x00\x00\x26\xa2\x05\x1c\x15\x0c\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x31\x15\x02\x16\x02\x16\x68\x16\x70\x26\xde\x04\x26\xb2\x04\x1c\x36\x00\x28\x02\x41\x4d\x18\x02\x41\x4d\x00\x00\x00\x26\x8a\x07\x1c\x15\x08\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x32\x15\x02\x16\x02\x16\x84\x01\x16\x8c\x01\x26\xa6\x06\x26\xfe\x05\x1c\x18\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x16\x00\x28\x04\x00\x00\x00\x3f\x18\x04\x00\x00\x00\x3f\x00\x00\x00\x26\xfe\x08\x1c\x15\x02\x19\x35\x04\x00\x06\x19\x18\x04\x76\x61\x6c\x33\x15\x02\x16\x02\x16\x6c\x16\x74\x26\xb2\x08\x26\x8a\x08\x1c\x36\x00\x28\x04\x01\x00\x00\x00\x18\x04\x01\x00\x00\x00\x00\x00\x00\x16\x98\x05\x16\x02\x00\x28\x22\x70\x61\x72\x71\x75\x65\x74\x2d\x63\x70\x70\x20\x76\x65\x72\x73\x69\x6f\x6e\x20\x31\x2e\x35\x2e\x31\x2d\x53\x4e\x41\x50\x53\x48\x4f\x54\x19\x5c\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x1c\x00\x00\x00\xc4\x01\x00\x00\x50\x41\x52\x31", + ], + }, + "AvroConfluent": { + "data_sample": [ + avro_confluent_message( + schema_registry_client, + {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1}, + ), + b"".join( + [ + avro_confluent_message( + schema_registry_client, + { + "id": id, + "blockNo": 0, + "val1": str("AM"), + "val2": 0.5, + "val3": 1, + }, + ) + for id in range(1, 16) + ] + ), + avro_confluent_message( + schema_registry_client, + {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1}, + ), + ], + "extra_settings": { + "format_avro_schema_registry_url": "http://{}:{}".format( + kafka_cluster.schema_registry_host, + kafka_cluster.schema_registry_port, + ) + }, + "supports_empty_value": True, + }, + "Avro": { + # It seems impossible to send more than one avro file per a message + # because of nature of Avro: blocks go one after another + "data_sample": [ + avro_message( + {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1} + ), + avro_message( + [ + { + "id": id, + "blockNo": 0, + "val1": str("AM"), + "val2": 0.5, + "val3": 1, + } + for id in range(1, 16) + ] + ), + avro_message( + {"id": 0, "blockNo": 0, "val1": str("AM"), "val2": 0.5, "val3": 1} + ), + ], + "supports_empty_value": False, + }, + "Arrow": { + "data_sample": [ + b"\x41\x52\x52\x4f\x57\x31\x00\x00\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x03\x00\x3c\x00\x00\x00\x28\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x58\x01\x00\x00\x00\x00\x00\x00\x60\x01\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\x78\x01\x00\x00\x41\x52\x52\x4f\x57\x31", + b"\x41\x52\x52\x4f\x57\x31\x00\x00\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x48\x01\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\xd8\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x04\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00\x0a\x00\x00\x00\x0c\x00\x00\x00\x0e\x00\x00\x00\x10\x00\x00\x00\x12\x00\x00\x00\x14\x00\x00\x00\x16\x00\x00\x00\x18\x00\x00\x00\x1a\x00\x00\x00\x1c\x00\x00\x00\x1e\x00\x00\x00\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x03\x00\x3c\x00\x00\x00\x28\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x58\x01\x00\x00\x00\x00\x00\x00\x60\x01\x00\x00\x00\x00\x00\x00\x48\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\x78\x01\x00\x00\x41\x52\x52\x4f\x57\x31", + b"\x41\x52\x52\x4f\x57\x31\x00\x00\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00\x10\x00\x00\x00\x0c\x00\x14\x00\x06\x00\x08\x00\x0c\x00\x10\x00\x0c\x00\x00\x00\x00\x00\x03\x00\x3c\x00\x00\x00\x28\x00\x00\x00\x04\x00\x00\x00\x01\x00\x00\x00\x58\x01\x00\x00\x00\x00\x00\x00\x60\x01\x00\x00\x00\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\x78\x01\x00\x00\x41\x52\x52\x4f\x57\x31", + ], + }, + "ArrowStream": { + "data_sample": [ + b"\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00", + b"\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x48\x01\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x78\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x98\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\xd8\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf8\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x38\x01\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x09\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00\x00\x00\x00\x00\x0d\x00\x00\x00\x00\x00\x00\x00\x0e\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x04\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00\x0a\x00\x00\x00\x0c\x00\x00\x00\x0e\x00\x00\x00\x10\x00\x00\x00\x12\x00\x00\x00\x14\x00\x00\x00\x16\x00\x00\x00\x18\x00\x00\x00\x1a\x00\x00\x00\x1c\x00\x00\x00\x1e\x00\x00\x00\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x41\x4d\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x00\xff\xff\xff\xff\x00\x00\x00\x00", + b"\xff\xff\xff\xff\x48\x01\x00\x00\x10\x00\x00\x00\x00\x00\x0a\x00\x0c\x00\x06\x00\x05\x00\x08\x00\x0a\x00\x00\x00\x00\x01\x03\x00\x0c\x00\x00\x00\x08\x00\x08\x00\x00\x00\x04\x00\x08\x00\x00\x00\x04\x00\x00\x00\x05\x00\x00\x00\xe4\x00\x00\x00\x9c\x00\x00\x00\x6c\x00\x00\x00\x34\x00\x00\x00\x04\x00\x00\x00\x40\xff\xff\xff\x00\x00\x00\x02\x18\x00\x00\x00\x0c\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x72\xff\xff\xff\x08\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x33\x00\x00\x00\x00\x6c\xff\xff\xff\x00\x00\x00\x03\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x06\x00\x06\x00\x00\x00\x00\x00\x01\x00\x04\x00\x00\x00\x76\x61\x6c\x32\x00\x00\x00\x00\xa0\xff\xff\xff\x00\x00\x00\x05\x18\x00\x00\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x04\x00\x04\x00\x04\x00\x00\x00\x04\x00\x00\x00\x76\x61\x6c\x31\x00\x00\x00\x00\xcc\xff\xff\xff\x00\x00\x00\x02\x20\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x06\x00\x08\x00\x04\x00\x06\x00\x00\x00\x10\x00\x00\x00\x07\x00\x00\x00\x62\x6c\x6f\x63\x6b\x4e\x6f\x00\x10\x00\x14\x00\x08\x00\x00\x00\x07\x00\x0c\x00\x00\x00\x10\x00\x10\x00\x00\x00\x00\x00\x00\x02\x24\x00\x00\x00\x14\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x08\x00\x0c\x00\x08\x00\x07\x00\x08\x00\x00\x00\x00\x00\x00\x01\x40\x00\x00\x00\x02\x00\x00\x00\x69\x64\x00\x00\xff\xff\xff\xff\x58\x01\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x16\x00\x06\x00\x05\x00\x08\x00\x0c\x00\x0c\x00\x00\x00\x00\x03\x03\x00\x18\x00\x00\x00\x30\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0a\x00\x18\x00\x0c\x00\x04\x00\x08\x00\x0a\x00\x00\x00\xcc\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x18\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x28\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x41\x4d\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3f\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\xff\xff\xff\xff\x00\x00\x00\x00", + ], + }, + } + + topic_postfix = str(hash(create_query_generator)) + for format_name, format_opts in list(all_formats.items()): + logging.debug(f"Set up {format_name}") + topic_name = f"format_tests_{format_name}-{topic_postfix}" + data_sample = format_opts["data_sample"] + data_prefix = [] + # prepend empty value when supported + if format_opts.get("supports_empty_value", False): + data_prefix = data_prefix + [""] + kafka_produce(kafka_cluster, topic_name, data_prefix + data_sample) + + extra_settings = format_opts.get("extra_settings") or {} + extra_settings["kafka_flush_interval_ms"] = 1000 + + instance.query( + """ + DROP TABLE IF EXISTS test.kafka_{format_name}; + + {create_query}; + + DROP TABLE IF EXISTS test.kafka_{format_name}_mv; + + CREATE MATERIALIZED VIEW test.kafka_{format_name}_mv ENGINE=MergeTree ORDER BY tuple() AS + SELECT *, _topic, _partition, _offset FROM test.kafka_{format_name}; + """.format( + topic_name=topic_name, + format_name=format_name, + create_query=create_query_generator( + f"kafka_{format_name}", + "id Int64, blockNo UInt16, val1 String, val2 Float32, val3 UInt8", + topic_list=f"{topic_name}", + consumer_group=f"{topic_name}_group", + format=format_name, + settings=extra_settings, + ), + ) + ) + raw_expected = """\ +0 0 AM 0.5 1 {topic_name} 0 {offset_0} +1 0 AM 0.5 1 {topic_name} 0 {offset_1} +2 0 AM 0.5 1 {topic_name} 0 {offset_1} +3 0 AM 0.5 1 {topic_name} 0 {offset_1} +4 0 AM 0.5 1 {topic_name} 0 {offset_1} +5 0 AM 0.5 1 {topic_name} 0 {offset_1} +6 0 AM 0.5 1 {topic_name} 0 {offset_1} +7 0 AM 0.5 1 {topic_name} 0 {offset_1} +8 0 AM 0.5 1 {topic_name} 0 {offset_1} +9 0 AM 0.5 1 {topic_name} 0 {offset_1} +10 0 AM 0.5 1 {topic_name} 0 {offset_1} +11 0 AM 0.5 1 {topic_name} 0 {offset_1} +12 0 AM 0.5 1 {topic_name} 0 {offset_1} +13 0 AM 0.5 1 {topic_name} 0 {offset_1} +14 0 AM 0.5 1 {topic_name} 0 {offset_1} +15 0 AM 0.5 1 {topic_name} 0 {offset_1} +0 0 AM 0.5 1 {topic_name} 0 {offset_2} +""" + + expected_rows_count = raw_expected.count("\n") + result_checker = lambda res: res.count("\n") == expected_rows_count + res = instance.query_with_retry( + f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", + retry_count=30, + sleep_time=1, + check_callback=result_checker, + ) + assert result_checker(res) + + for format_name, format_opts in list(all_formats.items()): + logging.debug(("Checking {}".format(format_name))) + topic_name = f"format_tests_{format_name}-{topic_postfix}" + # shift offsets by 1 if format supports empty value + offsets = ( + [1, 2, 3] if format_opts.get("supports_empty_value", False) else [0, 1, 2] + ) + result = instance.query_with_retry( + "SELECT * FROM test.kafka_{format_name}_mv;".format( + format_name=format_name + ), + check_callback=lambda x: x.count("\n") == raw_expected.count("\n"), + ) + expected = raw_expected.format( + topic_name=topic_name, + offset_0=offsets[0], + offset_1=offsets[1], + offset_2=offsets[2], + ) + assert TSV(result) == TSV(expected), "Proper result for format: {}".format( + format_name + ) + kafka_delete_topic(get_admin_client(kafka_cluster), topic_name) + + +@pytest.mark.parametrize( + "create_query_generator, log_line", + [ + (generate_old_create_table_query, "{}.*Polled offset [0-9]+"), + (generate_new_create_table_query, "{}.*Saved offset"), + ], +) +def test_kafka_rebalance( + kafka_cluster: ClickHouseCluster, + instance: ClickHouseInstance, + create_query_generator, + log_line, +): + NUMBER_OF_CONSURRENT_CONSUMERS = 11 + + instance.query( + """ + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination ( + key UInt64, + value UInt64, + _topic String, + _key String, + _offset UInt64, + _partition UInt64, + _timestamp Nullable(DateTime('UTC')), + _consumed_by LowCardinality(String) + ) + ENGINE = MergeTree() + ORDER BY key; + """ + ) + + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + topic_name = "topic_with_multiple_partitions" + get_topic_postfix( + create_query_generator + ) + table_name_prefix = "kafka_consumer" + keeper_path = f"/clickhouse/{{database}}/{table_name_prefix}" + with kafka_topic(admin_client, topic_name, num_partitions=11): + cancel = threading.Event() + + msg_index = [0] + + def produce(): + while not cancel.is_set(): + messages = [] + for _ in range(59): + messages.append( + json.dumps({"key": msg_index[0], "value": msg_index[0]}) + ) + msg_index[0] += 1 + kafka_produce(kafka_cluster, topic_name, messages) + + kafka_thread = threading.Thread(target=produce) + kafka_thread.start() + + for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): + table_name = f"{table_name_prefix}{consumer_index}" + replica_name = f"r{consumer_index}" + logging.debug(f"Setting up {consumer_index}") + + create_query = create_query_generator( + table_name, + "key UInt64, value UInt64", + topic_list=topic_name, + keeper_path=keeper_path, + replica_name=replica_name, + settings={ + "kafka_max_block_size": 33, + "kafka_flush_interval_ms": 500, + }, + ) + instance.query( + f""" + DROP TABLE IF EXISTS test.{table_name}; + DROP TABLE IF EXISTS test.{table_name}_mv; + {create_query}; + CREATE MATERIALIZED VIEW test.{table_name}_mv TO test.destination AS + SELECT + key, + value, + _topic, + _key, + _offset, + _partition, + _timestamp, + '{table_name}' as _consumed_by + FROM test.{table_name}; + """ + ) + # Waiting for test.kafka_consumerX to start consume ... + instance.wait_for_log_line(log_line.format(table_name)) + + cancel.set() + + # I leave last one working by intent (to finish consuming after all rebalances) + for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS - 1): + logging.debug(("Dropping test.kafka_consumer{}".format(consumer_index))) + instance.query( + "DROP TABLE IF EXISTS test.kafka_consumer{} SYNC".format(consumer_index) + ) + + while 1: + messages_consumed = int( + instance.query("SELECT uniqExact(key) FROM test.destination") + ) + if messages_consumed >= msg_index[0]: + break + time.sleep(1) + logging.debug( + ( + "Waiting for finishing consuming (have {}, should be {})".format( + messages_consumed, msg_index[0] + ) + ) + ) + + logging.debug( + ( + instance.query( + "SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination" + ) + ) + ) + + # Some queries to debug... + # SELECT * FROM test.destination where key in (SELECT key FROM test.destination group by key having count() <> 1) + # select number + 1 as key from numbers(4141) x left join test.destination using (key) where test.destination.key = 0; + # SELECT * FROM test.destination WHERE key between 2360 and 2370 order by key; + # select _partition from test.destination group by _partition having count() <> max(_offset) + 1; + # select toUInt64(0) as _partition, number + 1 as _offset from numbers(400) x left join test.destination using (_partition,_offset) where test.destination.key = 0 order by _offset; + # SELECT * FROM test.destination WHERE _partition = 0 and _offset between 220 and 240 order by _offset; + + # CREATE TABLE test.reference (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', + # kafka_topic_list = 'topic_with_multiple_partitions', + # kafka_group_name = 'rebalance_test_group_reference', + # kafka_format = 'JSONEachRow', + # kafka_max_block_size = 100000; + # + # CREATE MATERIALIZED VIEW test.reference_mv Engine=Log AS + # SELECT key, value, _topic,_key,_offset, _partition, _timestamp, 'reference' as _consumed_by + # FROM test.reference; + # + # select * from test.reference_mv left join test.destination using (key,_topic,_offset,_partition) where test.destination._consumed_by = ''; + + result = int( + instance.query("SELECT count() == uniqExact(key) FROM test.destination") + ) + + for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): + logging.debug(("kafka_consumer{}".format(consumer_index))) + table_name = "kafka_consumer{}".format(consumer_index) + instance.query( + """ + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + """.format( + table_name + ) + ) + + instance.query( + """ + DROP TABLE IF EXISTS test.destination; + """ + ) + + kafka_thread.join() + + assert result == 1, "Messages from kafka get duplicated!" + + +if __name__ == "__main__": + init_cluster_and_instance() + conftest_cluster.start() + input("Cluster created, press any key to destroy...") + conftest_cluster.shutdown() From 4b96ca960e7045733ca865889ffb08f4bfef5c98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 18 Sep 2024 15:22:14 +0000 Subject: [PATCH 10/11] Update parallel_skip.json --- tests/integration/parallel_skip.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index ed1808bf27b..6767aa85be9 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -104,7 +104,6 @@ "test_storage_kafka/test_1.py::test_kafka_duplicates_when_commit_failed", "test_storage_kafka/test_1.py::test_commits_of_unprocessed_messages_on_drop", "test_storage_kafka/test_1.py::test_kafka_produce_consume", - "test_storage_kafka/test_1.py::test_system_kafka_consumers_rebalance", "test_storage_kafka/test_1.py::test_kafka_materialized_view", "test_storage_kafka/test_1.py::test_kafka_materialized_view_with_subquery", "test_storage_kafka/test_1.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", @@ -148,8 +147,6 @@ "test_storage_kafka/test_3.py::test_kafka_tsv_with_delimiter", "test_storage_kafka/test_3.py::test_kafka_csv_with_delimiter", "test_storage_kafka/test_3.py::test_format_with_prefix_and_suffix", - "test_storage_kafka/test_4.py::test_kafka_formats", - "test_storage_kafka/test_4.py::test_kafka_rebalance", "test_storage_kafka/test_4.py::test_kafka_formats_with_broken_message", "test_storage_kafka/test_4.py::test_kafka_flush_by_time", "test_storage_kafka/test_4.py::test_system_kafka_consumers_rebalance", @@ -164,6 +161,9 @@ "test_storage_kafka/test_4.py::test_kafka_settings_predefined_macros", "test_storage_kafka/test_4.py::test_kafka_issue4116", "test_storage_kafka/test_4.py::test_kafka_issue14202", + "test_storage_kafka/test_5.py::test_kafka_formats", + "test_storage_kafka/test_5.py::test_kafka_rebalance", + "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", From 95b8d80318e54def4cfec9f4825072f75c8cea1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 18 Sep 2024 18:26:56 +0000 Subject: [PATCH 11/11] Try to run kafka tests parallel --- tests/integration/parallel_skip.json | 68 ---------------------------- 1 file changed, 68 deletions(-) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 6767aa85be9..1b48b64593e 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -99,74 +99,6 @@ "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_table_detach", "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect", - "test_storage_kafka/test_1.py::test_kafka_consumer_failover", - "test_storage_kafka/test_1.py::test_row_based_formats", - "test_storage_kafka/test_1.py::test_kafka_duplicates_when_commit_failed", - "test_storage_kafka/test_1.py::test_commits_of_unprocessed_messages_on_drop", - "test_storage_kafka/test_1.py::test_kafka_produce_consume", - "test_storage_kafka/test_1.py::test_kafka_materialized_view", - "test_storage_kafka/test_1.py::test_kafka_materialized_view_with_subquery", - "test_storage_kafka/test_1.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", - "test_storage_kafka/test_1.py::test_issue26643", - "test_storage_kafka/test_1.py::test_kafka_csv_with_thread_per_consumer", - "test_storage_kafka/test_1.py::test_kafka_select_empty", - "test_storage_kafka/test_1.py::test_kafka_issue11308", - "test_storage_kafka/test_1.py::test_kafka_protobuf", - "test_storage_kafka/test_1.py::test_kafka_predefined_configuration", - "test_storage_kafka/test_1.py::test_kafka_insert_avro", - "test_storage_kafka/test_1.py::test_exception_from_destructor", - "test_storage_kafka/test_2.py::test_kafka_unavailable", - "test_storage_kafka/test_2.py::test_bad_reschedule", - "test_storage_kafka/test_2.py::test_librdkafka_compression", - "test_storage_kafka/test_2.py::test_multiple_read_in_materialized_views", - "test_storage_kafka/test_2.py::test_kafka_virtual_columns2", - "test_storage_kafka/test_2.py::test_kafka_produce_key_timestamp", - "test_storage_kafka/test_2.py::test_kafka_engine_put_errors_to_stream_with_random_malformed_json", - "test_storage_kafka/test_2.py::test_kafka_produce_consume_avro", - "test_storage_kafka/test_2.py::test_kafka_virtual_columns_with_materialized_view", - "test_storage_kafka/test_2.py::test_system_kafka_consumers_rebalance_mv", - "test_storage_kafka/test_2.py::test_system_kafka_consumers", - "test_storage_kafka/test_2.py::test_kafka_insert", - "test_storage_kafka/test_2.py::test_kafka_settings_old_syntax", - "test_storage_kafka/test_2.py::test_kafka_json_without_delimiter", - "test_storage_kafka/test_2.py::test_kafka_string_field_on_first_position_in_protobuf", - "test_storage_kafka/test_2.py::test_block_based_formats_1", - "test_storage_kafka/test_3.py::test_formats_errors", - "test_storage_kafka/test_3.py::test_block_based_formats_2", - "test_storage_kafka/test_3.py::test_kafka_flush_on_big_message", - "test_storage_kafka/test_3.py::test_kafka_recreate_kafka_table", - "test_storage_kafka/test_3.py::test_kafka_commit_on_block_write", - "test_storage_kafka/test_3.py::test_kafka_no_holes_when_write_suffix_failed", - "test_storage_kafka/test_3.py::test_kafka_flush_by_block_size", - "test_storage_kafka/test_3.py::test_max_rows_per_message", - "test_storage_kafka/test_3.py::test_kafka_many_materialized_views", - "test_storage_kafka/test_3.py::test_kafka_engine_put_errors_to_stream", - "test_storage_kafka/test_3.py::test_kafka_producer_consumer_separate_settings", - "test_storage_kafka/test_3.py::test_kafka_virtual_columns", - "test_storage_kafka/test_3.py::test_kafka_settings_new_syntax", - "test_storage_kafka/test_3.py::test_kafka_tsv_with_delimiter", - "test_storage_kafka/test_3.py::test_kafka_csv_with_delimiter", - "test_storage_kafka/test_3.py::test_format_with_prefix_and_suffix", - "test_storage_kafka/test_4.py::test_kafka_formats_with_broken_message", - "test_storage_kafka/test_4.py::test_kafka_flush_by_time", - "test_storage_kafka/test_4.py::test_system_kafka_consumers_rebalance", - "test_storage_kafka/test_4.py::test_kafka_consumer_hang", - "test_storage_kafka/test_4.py::test_kafka_null_message", - "test_storage_kafka/test_4.py::test_kafka_read_consumers_in_parallel", - "test_storage_kafka/test_4.py::test_premature_flush_on_eof", - "test_storage_kafka/test_4.py::test_num_consumers_limit", - "test_storage_kafka/test_4.py::test_kafka_column_types", - "test_storage_kafka/test_4.py::test_kafka_protobuf_no_delimiter", - "test_storage_kafka/test_4.py::test_kafka_json_as_string", - "test_storage_kafka/test_4.py::test_kafka_settings_predefined_macros", - "test_storage_kafka/test_4.py::test_kafka_issue4116", - "test_storage_kafka/test_4.py::test_kafka_issue14202", - "test_storage_kafka/test_5.py::test_kafka_formats", - "test_storage_kafka/test_5.py::test_kafka_rebalance", - - - "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", - "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc",