ClickHouse/dbms/tests/integration/test_storage_kafka/test.py

import os.path as p
import time
import pytest

from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV

import json
import subprocess
import kafka.errors
from kafka import KafkaAdminClient, KafkaProducer
from google.protobuf.internal.encoder import _VarintBytes

"""
protoc --version
libprotoc 3.0.0

# to create kafka_pb2.py
protoc --python_out=. kafka.proto
"""
import kafka_pb2


# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side.
# TODO: add test for mat. view is working.
# TODO: add test for SELECT LIMIT is working.
# TODO: modify tests to respect `skip_broken_messages` setting.

cluster = ClickHouseCluster(__file__)
instance = cluster.add_instance('instance',
                                main_configs=['configs/kafka.xml'],
                                with_kafka=True,
                                clickhouse_path_dir='clickhouse_path')
kafka_id = ''


# Helpers

def check_kafka_is_available():
    p = subprocess.Popen(('docker',
                          'exec',
                          '-i',
                          kafka_id,
                          '/usr/bin/kafka-broker-api-versions',
                          '--bootstrap-server',
                          'INSIDE://localhost:9092'),
                         stdout=subprocess.PIPE)
    p.communicate()
    return p.returncode == 0


def wait_kafka_is_available(max_retries=50):
    retries = 0
    while True:
        if check_kafka_is_available():
            break
        else:
            retries += 1
            if retries > max_retries:
                raise "Kafka is not available"
            print("Waiting for Kafka to start up")
            time.sleep(1)


def kafka_produce(topic, messages):
    producer = KafkaProducer(bootstrap_servers="localhost:9092")
    for message in messages:
        producer.send(topic=topic, value=message)
        producer.flush()
    print ("Produced {} messages for topic {}".format(len(messages), topic))


def kafka_produce_protobuf_messages(topic, start_index, num_messages):
    data = ''
    for i in range(start_index, start_index + num_messages):
        msg = kafka_pb2.KeyValuePair()
        msg.key = i
        msg.value = str(i)
        serialized_msg = msg.SerializeToString()
        data = data + _VarintBytes(len(serialized_msg)) + serialized_msg
    producer = KafkaProducer(bootstrap_servers="localhost:9092")
    producer.send(topic=topic, value=data)
    producer.flush()
    print("Produced {} messages for topic {}".format(num_messages, topic))


# Since everything is async and shaky when receiving messages from Kafka,
# we may want to try and check results multiple times in a loop.
def  kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):
    fpath = p.join(p.dirname(__file__), ref_file)
    with open(fpath) as reference:
        if check:
            assert TSV(result) == TSV(reference)
        else:
            return TSV(result) == TSV(reference)


# Fixtures

@pytest.fixture(scope="module")
def kafka_cluster():
    try:
        global kafka_id
        cluster.start()
        kafka_id = instance.cluster.kafka_docker_id
        print("kafka_id is {}".format(kafka_id))
        instance.query('CREATE DATABASE test')

        yield cluster

    finally:
        cluster.shutdown()


@pytest.fixture(autouse=True)
def kafka_setup_teardown():
    instance.query('DROP TABLE IF EXISTS test.kafka')
    wait_kafka_is_available()
    print("kafka is available - running test")
    yield  # run test
    instance.query('DROP TABLE test.kafka')


# Tests

def test_kafka_settings_old_syntax(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka('kafka1:19092', 'old', 'old', 'JSONEachRow', '\\n');
        ''')

    # Don't insert malformed messages since old settings syntax
    # doesn't support skipping of broken messages.
    messages = []
    for i in range(50):
        messages.append(json.dumps({'key': i, 'value': i}))
    kafka_produce('old', messages)

    result = ''
    for i in range(50):
        result += instance.query('SELECT * FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)


def test_kafka_settings_new_syntax(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'new',
                kafka_group_name = 'new',
                kafka_format = 'JSONEachRow',
                kafka_row_delimiter = '\\n',
                kafka_skip_broken_messages = 1;
        ''')

    messages = []
    for i in range(25):
        messages.append(json.dumps({'key': i, 'value': i}))
    kafka_produce('new', messages)

    # Insert couple of malformed messages.
    kafka_produce('new', ['}{very_broken_message,'])
    kafka_produce('new', ['}another{very_broken_message,'])

    messages = []
    for i in range(25, 50):
        messages.append(json.dumps({'key': i, 'value': i}))
    kafka_produce('new', messages)

    result = ''
    for i in range(50):
        result += instance.query('SELECT * FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)


def test_kafka_csv_with_delimiter(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'csv',
                kafka_group_name = 'csv',
                kafka_format = 'CSV',
                kafka_row_delimiter = '\\n';
        ''')

    messages = []
    for i in range(50):
        messages.append('{i}, {i}'.format(i=i))
    kafka_produce('csv', messages)

    result = ''
    for i in range(50):
        result += instance.query('SELECT * FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)


def test_kafka_tsv_with_delimiter(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'tsv',
                kafka_group_name = 'tsv',
                kafka_format = 'TSV',
                kafka_row_delimiter = '\\n';
        ''')

    messages = []
    for i in range(50):
        messages.append('{i}\t{i}'.format(i=i))
    kafka_produce('tsv', messages)

    result = ''
    for i in range(50):
        result += instance.query('SELECT * FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)


def test_kafka_json_without_delimiter(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'json',
                kafka_group_name = 'json',
                kafka_format = 'JSONEachRow';
        ''')

    messages = ''
    for i in range(25):
        messages += json.dumps({'key': i, 'value': i}) + '\n'
    kafka_produce('json', [messages])

    messages = ''
    for i in range(25, 50):
        messages += json.dumps({'key': i, 'value': i}) + '\n'
    kafka_produce('json', [messages])

    result = ''
    for i in range(50):
        result += instance.query('SELECT * FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)


def test_kafka_protobuf(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value String)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'pb',
                kafka_group_name = 'pb',
                kafka_format = 'Protobuf',
                kafka_schema = 'kafka.proto:KeyValuePair';
        ''')

    kafka_produce_protobuf_messages('pb', 0, 20)
    kafka_produce_protobuf_messages('pb', 20, 1)
    kafka_produce_protobuf_messages('pb', 21, 29)

    result = ''
    for i in range(50):
        result += instance.query('SELECT * FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)


def test_kafka_materialized_view(kafka_cluster):
    instance.query('''
        DROP TABLE IF EXISTS test.view;
        DROP TABLE IF EXISTS test.consumer;
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'json',
                kafka_group_name = 'json',
                kafka_format = 'JSONEachRow',
                kafka_row_delimiter = '\\n';
        CREATE TABLE test.view (key UInt64, value UInt64)
            ENGINE = MergeTree()
            ORDER BY key;
        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
            SELECT * FROM test.kafka;
    ''')

    messages = []
    for i in range(50):
        messages.append(json.dumps({'key': i, 'value': i}))
    kafka_produce('json', messages)

    for i in range(20):
        time.sleep(1)
        result = instance.query('SELECT * FROM test.view')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True)

    instance.query('''
        DROP TABLE test.consumer;
        DROP TABLE test.view;
    ''')


def test_kafka_flush_on_big_message(kafka_cluster):
    # Create batchs of messages of size ~100Kb
    kafka_messages = 10000
    batch_messages = 1000
    messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)]
    kafka_produce('flush', messages)

    instance.query('''
        DROP TABLE IF EXISTS test.view;
        DROP TABLE IF EXISTS test.consumer;
        CREATE TABLE test.kafka (key UInt64, value String)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'flush',
                kafka_group_name = 'flush',
                kafka_format = 'JSONEachRow',
                kafka_max_block_size = 10;
        CREATE TABLE test.view (key UInt64, value String)
            ENGINE = MergeTree
            ORDER BY key;
        CREATE MATERIALIZED VIEW test.consumer TO test.view AS
            SELECT * FROM test.kafka;
    ''')

    client = KafkaAdminClient(bootstrap_servers="localhost:9092")
    received = False
    while not received:
        try:
            offsets = client.list_consumer_group_offsets('flush')
            for topic, offset in offsets.items():
                if topic.topic == 'flush' and offset.offset == kafka_messages:
                    received = True
                    break
        except kafka.errors.GroupCoordinatorNotAvailableError:
            continue

    for _ in range(20):
        time.sleep(1)
        result = instance.query('SELECT count() FROM test.view')
        if int(result) == kafka_messages*batch_messages:
            break

    assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result)


def test_kafka_virtual_columns(kafka_cluster):
    instance.query('''
        CREATE TABLE test.kafka (key UInt64, value UInt64)
            ENGINE = Kafka
            SETTINGS
                kafka_broker_list = 'kafka1:19092',
                kafka_topic_list = 'json',
                kafka_group_name = 'json',
                kafka_format = 'JSONEachRow';
        ''')

    messages = ''
    for i in range(25):
        messages += json.dumps({'key': i, 'value': i}) + '\n'
    kafka_produce('json', [messages])

    messages = ''
    for i in range(25, 50):
        messages += json.dumps({'key': i, 'value': i}) + '\n'
    kafka_produce('json', [messages])

    result = ''
    for i in range(50):
        result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka')
        if kafka_check_result(result):
            break
    kafka_check_result(result, True, 'test_kafka_virtual.reference')


if __name__ == '__main__':
    cluster.start()
    raw_input("Cluster created, press any key to destroy...")
    cluster.shutdown()
Add row_delimiter argument to StorageKafka. There are common cases where a message doesn't end with a row delimiter. This patch allows specifying a row_delimiter char to compensate that. https://github.com/yandex/ClickHouse/issues/2298 2018-07-18 05:22:01 +00:00			`import os.path as p`
			`import time`
			`import pytest`

			`from helpers.cluster import ClickHouseCluster`
			`from helpers.test_tools import TSV`

			`import json`
Use docker exec to operate on kafka. 2018-07-26 04:36:28 +00:00			`import subprocess`
Add test on lost messages 2019-04-18 15:52:18 +00:00			`import kafka.errors`
			`from kafka import KafkaAdminClient, KafkaProducer`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`from google.protobuf.internal.encoder import _VarintBytes`

			`"""`
			`protoc --version`
			`libprotoc 3.0.0`

			`# to create kafka_pb2.py`
			`protoc --python_out=. kafka.proto`
			`"""`
			`import kafka_pb2`
Add row_delimiter argument to StorageKafka. There are common cases where a message doesn't end with a row delimiter. This patch allows specifying a row_delimiter char to compensate that. https://github.com/yandex/ClickHouse/issues/2298 2018-07-18 05:22:01 +00:00

Add integration tests 2019-01-22 12:18:18 +00:00			`# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side.`
			`# TODO: add test for mat. view is working.`
			`# TODO: add test for SELECT LIMIT is working.`
			# TODO: modify tests to respect `skip_broken_messages` setting.

Add row_delimiter argument to StorageKafka. There are common cases where a message doesn't end with a row delimiter. This patch allows specifying a row_delimiter char to compensate that. https://github.com/yandex/ClickHouse/issues/2298 2018-07-18 05:22:01 +00:00			`cluster = ClickHouseCluster(__file__)`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`instance = cluster.add_instance('instance',`
			`main_configs=['configs/kafka.xml'],`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`with_kafka=True,`
			`clickhouse_path_dir='clickhouse_path')`
Fix running integration tests locally on Fedora (with selinux) Finally! 2019-02-08 14:20:25 +00:00			`kafka_id = ''`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00
Add row_delimiter argument to StorageKafka. There are common cases where a message doesn't end with a row delimiter. This patch allows specifying a row_delimiter char to compensate that. https://github.com/yandex/ClickHouse/issues/2298 2018-07-18 05:22:01 +00:00
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`# Helpers`
Add row_delimiter argument to StorageKafka. There are common cases where a message doesn't end with a row delimiter. This patch allows specifying a row_delimiter char to compensate that. https://github.com/yandex/ClickHouse/issues/2298 2018-07-18 05:22:01 +00:00
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`def check_kafka_is_available():`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`p = subprocess.Popen(('docker',`
			`'exec',`
			`'-i',`
CLICKHOUSE-3894: Fix Kafka test 2018-08-27 16:15:39 +00:00			`kafka_id,`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`'/usr/bin/kafka-broker-api-versions',`
			`'--bootstrap-server',`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`'INSIDE://localhost:9092'),`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`stdout=subprocess.PIPE)`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`p.communicate()`
Use docker exec to operate on kafka. 2018-07-26 04:36:28 +00:00			`return p.returncode == 0`

Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`def wait_kafka_is_available(max_retries=50):`
			`retries = 0`
			`while True:`
			`if check_kafka_is_available():`
			`break`
			`else:`
			`retries += 1`
			`if retries > max_retries:`
			`raise "Kafka is not available"`
			`print("Waiting for Kafka to start up")`
			`time.sleep(1)`


			`def kafka_produce(topic, messages):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`producer = KafkaProducer(bootstrap_servers="localhost:9092")`
			`for message in messages:`
			`producer.send(topic=topic, value=message)`
			`producer.flush()`
			`print ("Produced {} messages for topic {}".format(len(messages), topic))`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00

			`def kafka_produce_protobuf_messages(topic, start_index, num_messages):`
			`data = ''`
			`for i in range(start_index, start_index + num_messages):`
			`msg = kafka_pb2.KeyValuePair()`
			`msg.key = i`
			`msg.value = str(i)`
			`serialized_msg = msg.SerializeToString()`
			`data = data + _VarintBytes(len(serialized_msg)) + serialized_msg`
			`producer = KafkaProducer(bootstrap_servers="localhost:9092")`
			`producer.send(topic=topic, value=data)`
			`producer.flush()`
			`print("Produced {} messages for topic {}".format(num_messages, topic))`
Use docker exec to operate on kafka. 2018-07-26 04:36:28 +00:00
Add row_delimiter argument to StorageKafka. There are common cases where a message doesn't end with a row delimiter. This patch allows specifying a row_delimiter char to compensate that. https://github.com/yandex/ClickHouse/issues/2298 2018-07-18 05:22:01 +00:00
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`# Since everything is async and shaky when receiving messages from Kafka,`
			`# we may want to try and check results multiple times in a loop.`
Add test on virtual columns 2019-05-23 14:25:41 +00:00			`def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):`
			`fpath = p.join(p.dirname(__file__), ref_file)`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`with open(fpath) as reference:`
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`if check:`
			`assert TSV(result) == TSV(reference)`
			`else:`
			`return TSV(result) == TSV(reference)`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00

			`# Fixtures`

			`@pytest.fixture(scope="module")`
			`def kafka_cluster():`
			`try:`
Fix running integration tests locally on Fedora (with selinux) Finally! 2019-02-08 14:20:25 +00:00			`global kafka_id`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`cluster.start()`
Fix running integration tests locally on Fedora (with selinux) Finally! 2019-02-08 14:20:25 +00:00			`kafka_id = instance.cluster.kafka_docker_id`
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`print("kafka_id is {}".format(kafka_id))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`instance.query('CREATE DATABASE test')`

			`yield cluster`

			`finally:`
			`cluster.shutdown()`


			`@pytest.fixture(autouse=True)`
			`def kafka_setup_teardown():`
			`instance.query('DROP TABLE IF EXISTS test.kafka')`
			`wait_kafka_is_available()`
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`print("kafka is available - running test")`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`yield # run test`
			`instance.query('DROP TABLE test.kafka')`


			`# Tests`

			`def test_kafka_settings_old_syntax(kafka_cluster):`
			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`ENGINE = Kafka('kafka1:19092', 'old', 'old', 'JSONEachRow', '\\n');`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`''')`

			`# Don't insert malformed messages since old settings syntax`
			`# doesn't support skipping of broken messages.`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages = []`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`for i in range(50):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages.append(json.dumps({'key': i, 'value': i}))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_produce('old', messages)`

Tests are now passing locally 2019-02-11 11:54:30 +00:00			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT * FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True)`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00

			`def test_kafka_settings_new_syntax(kafka_cluster):`
			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
			`ENGINE = Kafka`
			`SETTINGS`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`kafka_broker_list = 'kafka1:19092',`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_topic_list = 'new',`
			`kafka_group_name = 'new',`
			`kafka_format = 'JSONEachRow',`
			`kafka_row_delimiter = '\\n',`
			`kafka_skip_broken_messages = 1;`
			`''')`
Add integration tests 2019-01-22 12:18:18 +00:00
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages = []`
Add integration tests 2019-01-22 12:18:18 +00:00			`for i in range(25):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages.append(json.dumps({'key': i, 'value': i}))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_produce('new', messages)`
Add integration tests 2019-01-22 12:18:18 +00:00
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`# Insert couple of malformed messages.`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`kafka_produce('new', ['}{very_broken_message,'])`
			`kafka_produce('new', ['}another{very_broken_message,'])`
Add integration tests 2019-01-22 12:18:18 +00:00
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages = []`
Add integration tests 2019-01-22 12:18:18 +00:00			`for i in range(25, 50):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages.append(json.dumps({'key': i, 'value': i}))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_produce('new', messages)`
Add integration tests 2019-01-22 12:18:18 +00:00
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT * FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True)`
CLICKHOUSE-3894: Fix Kafka test 2018-08-27 16:15:39 +00:00
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`def test_kafka_csv_with_delimiter(kafka_cluster):`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`ENGINE = Kafka`
			`SETTINGS`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`kafka_broker_list = 'kafka1:19092',`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_topic_list = 'csv',`
			`kafka_group_name = 'csv',`
			`kafka_format = 'CSV',`
			`kafka_row_delimiter = '\\n';`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`''')`
Add integration tests 2019-01-22 12:18:18 +00:00
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages = []`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`for i in range(50):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages.append('{i}, {i}'.format(i=i))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_produce('csv', messages)`
Add integration tests 2019-01-22 12:18:18 +00:00
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT * FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True)`
Allow to start cluster with Kafka without pytest #2725 2018-07-26 14:40:33 +00:00
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`def test_kafka_tsv_with_delimiter(kafka_cluster):`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
			`ENGINE = Kafka`
			`SETTINGS`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`kafka_broker_list = 'kafka1:19092',`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_topic_list = 'tsv',`
			`kafka_group_name = 'tsv',`
			`kafka_format = 'TSV',`
			`kafka_row_delimiter = '\\n';`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00			`''')`
Add integration tests 2019-01-22 12:18:18 +00:00
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages = []`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`for i in range(50):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages.append('{i}\t{i}'.format(i=i))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_produce('tsv', messages)`
Add integration tests 2019-01-22 12:18:18 +00:00
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT * FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True)`
Added SETTINGS clause for Kafka storage engine 2018-08-01 17:23:50 +00:00

Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`def test_kafka_json_without_delimiter(kafka_cluster):`
			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
			`ENGINE = Kafka`
			`SETTINGS`
			`kafka_broker_list = 'kafka1:19092',`
			`kafka_topic_list = 'json',`
			`kafka_group_name = 'json',`
			`kafka_format = 'JSONEachRow';`
			`''')`

			`messages = ''`
			`for i in range(25):`
			`messages += json.dumps({'key': i, 'value': i}) + '\n'`
			`kafka_produce('json', [messages])`

Fix new test 2019-04-03 17:46:54 +00:00			`messages = ''`
			`for i in range(25, 50):`
			`messages += json.dumps({'key': i, 'value': i}) + '\n'`
			`kafka_produce('json', [messages])`

Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT * FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True)`


Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`def test_kafka_protobuf(kafka_cluster):`
			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value String)`
			`ENGINE = Kafka`
			`SETTINGS`
			`kafka_broker_list = 'kafka1:19092',`
			`kafka_topic_list = 'pb',`
			`kafka_group_name = 'pb',`
			`kafka_format = 'Protobuf',`
			`kafka_schema = 'kafka.proto:KeyValuePair';`
			`''')`

			`kafka_produce_protobuf_messages('pb', 0, 20)`
			`kafka_produce_protobuf_messages('pb', 20, 1)`
			`kafka_produce_protobuf_messages('pb', 21, 29)`

			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT * FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True)`


Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`def test_kafka_materialized_view(kafka_cluster):`
Add integration tests 2019-01-22 12:18:18 +00:00			`instance.query('''`
			`DROP TABLE IF EXISTS test.view;`
			`DROP TABLE IF EXISTS test.consumer;`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
			`ENGINE = Kafka`
			`SETTINGS`
Add a kafka test using protobuf format. 2019-03-29 13:53:00 +00:00			`kafka_broker_list = 'kafka1:19092',`
Add integration tests 2019-01-22 12:18:18 +00:00			`kafka_topic_list = 'json',`
			`kafka_group_name = 'json',`
			`kafka_format = 'JSONEachRow',`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_row_delimiter = '\\n';`
Add integration tests 2019-01-22 12:18:18 +00:00			`CREATE TABLE test.view (key UInt64, value UInt64)`
			`ENGINE = MergeTree()`
			`ORDER BY key;`
			`CREATE MATERIALIZED VIEW test.consumer TO test.view AS`
			`SELECT * FROM test.kafka;`
			`''')`

Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages = []`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`for i in range(50):`
Use python bindings in Kafka tests. 2019-04-02 17:34:04 +00:00			`messages.append(json.dumps({'key': i, 'value': i}))`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`kafka_produce('json', messages)`

Tests are now passing locally 2019-02-11 11:54:30 +00:00			`for i in range(20):`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`time.sleep(1)`
			`result = instance.query('SELECT * FROM test.view')`
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`if kafka_check_result(result):`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`break`
Tests are now passing locally 2019-02-11 11:54:30 +00:00			`kafka_check_result(result, True)`
Add integration tests 2019-01-22 12:18:18 +00:00
			`instance.query('''`
			`DROP TABLE test.consumer;`
Rewrite tests one more time 2019-02-07 16:40:16 +00:00			`DROP TABLE test.view;`
Add integration tests 2019-01-22 12:18:18 +00:00			`''')`


Add test on lost messages 2019-04-18 15:52:18 +00:00			`def test_kafka_flush_on_big_message(kafka_cluster):`
			`# Create batchs of messages of size ~100Kb`
			`kafka_messages = 10000`
			`batch_messages = 1000`
			`messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(kafka_messages)]`
			`kafka_produce('flush', messages)`

			`instance.query('''`
			`DROP TABLE IF EXISTS test.view;`
			`DROP TABLE IF EXISTS test.consumer;`
			`CREATE TABLE test.kafka (key UInt64, value String)`
			`ENGINE = Kafka`
			`SETTINGS`
			`kafka_broker_list = 'kafka1:19092',`
			`kafka_topic_list = 'flush',`
			`kafka_group_name = 'flush',`
			`kafka_format = 'JSONEachRow',`
			`kafka_max_block_size = 10;`
			`CREATE TABLE test.view (key UInt64, value String)`
			`ENGINE = MergeTree`
			`ORDER BY key;`
			`CREATE MATERIALIZED VIEW test.consumer TO test.view AS`
			`SELECT * FROM test.kafka;`
			`''')`

			`client = KafkaAdminClient(bootstrap_servers="localhost:9092")`
			`received = False`
			`while not received:`
			`try:`
			`offsets = client.list_consumer_group_offsets('flush')`
			`for topic, offset in offsets.items():`
			`if topic.topic == 'flush' and offset.offset == kafka_messages:`
			`received = True`
			`break`
			`except kafka.errors.GroupCoordinatorNotAvailableError:`
			`continue`

			`for _ in range(20):`
			`time.sleep(1)`
			`result = instance.query('SELECT count() FROM test.view')`
			`if int(result) == kafka_messages*batch_messages:`
			`break`

			`assert int(result) == kafka_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result)`


Add test on virtual columns 2019-05-23 14:25:41 +00:00			`def test_kafka_virtual_columns(kafka_cluster):`
			`instance.query('''`
			`CREATE TABLE test.kafka (key UInt64, value UInt64)`
			`ENGINE = Kafka`
			`SETTINGS`
			`kafka_broker_list = 'kafka1:19092',`
			`kafka_topic_list = 'json',`
			`kafka_group_name = 'json',`
			`kafka_format = 'JSONEachRow';`
			`''')`

			`messages = ''`
			`for i in range(25):`
			`messages += json.dumps({'key': i, 'value': i}) + '\n'`
			`kafka_produce('json', [messages])`

			`messages = ''`
			`for i in range(25, 50):`
			`messages += json.dumps({'key': i, 'value': i}) + '\n'`
			`kafka_produce('json', [messages])`

			`result = ''`
			`for i in range(50):`
			`result += instance.query('SELECT _key, key, _topic, value, _offset FROM test.kafka')`
			`if kafka_check_result(result):`
			`break`
			`kafka_check_result(result, True, 'test_kafka_virtual.reference')`


Allow to start cluster with Kafka without pytest #2725 2018-07-26 14:40:33 +00:00			`if __name__ == '__main__':`
			`cluster.start()`
			`raw_input("Cluster created, press any key to destroy...")`
			`cluster.shutdown()`