2018-07-18 05:22:01 +00:00
|
|
|
import os.path as p
|
|
|
|
import time
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
from helpers.cluster import ClickHouseCluster
|
|
|
|
from helpers.test_tools import TSV
|
|
|
|
|
|
|
|
import json
|
2018-07-26 04:36:28 +00:00
|
|
|
import subprocess
|
2018-07-18 05:22:01 +00:00
|
|
|
|
|
|
|
|
2019-01-22 12:18:18 +00:00
|
|
|
# TODO: add test for run-time offset update in CH, if we manually update it on Kafka side.
|
|
|
|
# TODO: add test for mat. view is working.
|
|
|
|
# TODO: add test for SELECT LIMIT is working.
|
|
|
|
# TODO: modify tests to respect `skip_broken_messages` setting.
|
|
|
|
|
2018-07-18 05:22:01 +00:00
|
|
|
cluster = ClickHouseCluster(__file__)
|
2018-08-01 17:23:50 +00:00
|
|
|
instance = cluster.add_instance('instance',
|
|
|
|
main_configs=['configs/kafka.xml'],
|
|
|
|
with_kafka=True)
|
|
|
|
|
2018-07-18 05:22:01 +00:00
|
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
|
|
def started_cluster():
|
|
|
|
try:
|
|
|
|
cluster.start()
|
|
|
|
instance.query('CREATE DATABASE test')
|
|
|
|
|
|
|
|
yield cluster
|
|
|
|
|
|
|
|
finally:
|
2018-07-26 15:52:50 +00:00
|
|
|
cluster.shutdown()
|
2018-07-18 05:22:01 +00:00
|
|
|
|
2018-08-01 17:23:50 +00:00
|
|
|
|
2018-08-27 16:15:39 +00:00
|
|
|
def kafka_is_available(kafka_id):
|
2018-08-01 17:23:50 +00:00
|
|
|
p = subprocess.Popen(('docker',
|
|
|
|
'exec',
|
|
|
|
'-i',
|
2018-08-27 16:15:39 +00:00
|
|
|
kafka_id,
|
2018-08-01 17:23:50 +00:00
|
|
|
'/usr/bin/kafka-broker-api-versions',
|
|
|
|
'--bootstrap-server',
|
|
|
|
'PLAINTEXT://localhost:9092'),
|
|
|
|
stdout=subprocess.PIPE)
|
|
|
|
p.communicate()[0]
|
2018-07-26 04:36:28 +00:00
|
|
|
return p.returncode == 0
|
|
|
|
|
2018-08-01 17:23:50 +00:00
|
|
|
|
2018-08-27 16:15:39 +00:00
|
|
|
def kafka_produce(kafka_id, topic, messages):
|
2018-08-01 17:23:50 +00:00
|
|
|
p = subprocess.Popen(('docker',
|
|
|
|
'exec',
|
|
|
|
'-i',
|
2018-08-27 16:15:39 +00:00
|
|
|
kafka_id,
|
2018-08-01 17:23:50 +00:00
|
|
|
'/usr/bin/kafka-console-producer',
|
|
|
|
'--broker-list',
|
|
|
|
'localhost:9092',
|
|
|
|
'--topic',
|
|
|
|
topic),
|
|
|
|
stdin=subprocess.PIPE)
|
2018-07-26 04:36:28 +00:00
|
|
|
p.communicate(messages)
|
|
|
|
p.stdin.close()
|
|
|
|
|
2018-07-18 05:22:01 +00:00
|
|
|
|
2019-01-23 16:12:41 +00:00
|
|
|
def kafka_check_json_numbers(instance, insert_malformed=False, table='test.kafka', select_count=3):
|
2018-07-18 05:22:01 +00:00
|
|
|
retries = 0
|
|
|
|
while True:
|
2018-08-27 16:15:39 +00:00
|
|
|
if kafka_is_available(instance.cluster.kafka_docker_id):
|
2018-07-18 05:22:01 +00:00
|
|
|
break
|
2018-07-26 04:36:28 +00:00
|
|
|
else:
|
2018-07-18 05:22:01 +00:00
|
|
|
retries += 1
|
|
|
|
if retries > 50:
|
2018-07-26 04:36:28 +00:00
|
|
|
raise 'Cannot connect to kafka.'
|
2018-07-18 05:22:01 +00:00
|
|
|
print("Waiting for kafka to be available...")
|
|
|
|
time.sleep(1)
|
2019-01-22 12:18:18 +00:00
|
|
|
|
2018-07-26 04:36:28 +00:00
|
|
|
messages = ''
|
2019-01-22 12:18:18 +00:00
|
|
|
for i in range(25):
|
2018-07-26 04:36:28 +00:00
|
|
|
messages += json.dumps({'key': i, 'value': i}) + '\n'
|
2018-08-27 16:15:39 +00:00
|
|
|
kafka_produce(instance.cluster.kafka_docker_id, 'json', messages)
|
2019-01-22 12:18:18 +00:00
|
|
|
|
|
|
|
if insert_malformed:
|
|
|
|
# Insert couple of malformed messages.
|
|
|
|
kafka_produce(instance.cluster.kafka_docker_id, 'json', '}{very_broken_message,\n')
|
|
|
|
kafka_produce(instance.cluster.kafka_docker_id, 'json', '}{very_broken_message,\n')
|
|
|
|
|
|
|
|
messages = ''
|
|
|
|
for i in range(25, 50):
|
|
|
|
messages += json.dumps({'key': i, 'value': i}) + '\n'
|
|
|
|
kafka_produce(instance.cluster.kafka_docker_id, 'json', messages)
|
|
|
|
|
|
|
|
# XXX: since the broken message breaks the `select` reading
|
2019-01-23 16:12:41 +00:00
|
|
|
# we'll try to select a limited number of times.
|
2019-01-23 11:05:19 +00:00
|
|
|
result = ''
|
2019-01-23 16:12:41 +00:00
|
|
|
for i in range(select_count):
|
2019-01-22 12:18:18 +00:00
|
|
|
time.sleep(1)
|
2019-01-23 11:05:19 +00:00
|
|
|
result += instance.query('SELECT * FROM {};'.format(table))
|
2018-08-27 16:15:39 +00:00
|
|
|
|
|
|
|
fpath = p.join(p.dirname(__file__), 'test_kafka_json.reference')
|
|
|
|
with open(fpath) as reference:
|
2018-07-18 05:22:01 +00:00
|
|
|
assert TSV(result) == TSV(reference)
|
2018-08-01 17:23:50 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_kafka_json(started_cluster):
|
|
|
|
instance.query('''
|
|
|
|
DROP TABLE IF EXISTS test.kafka;
|
|
|
|
CREATE TABLE test.kafka (key UInt64, value UInt64)
|
|
|
|
ENGINE = Kafka('kafka1:9092', 'json', 'json',
|
|
|
|
'JSONEachRow', '\\n');
|
|
|
|
''')
|
2019-01-22 12:18:18 +00:00
|
|
|
|
|
|
|
# Don't insert malformed messages since old settings syntax
|
|
|
|
# doesn't support skipping of broken messages.
|
2018-08-01 17:23:50 +00:00
|
|
|
kafka_check_json_numbers(instance)
|
2019-01-22 12:18:18 +00:00
|
|
|
|
2018-07-18 05:22:01 +00:00
|
|
|
instance.query('DROP TABLE test.kafka')
|
2018-07-26 14:40:33 +00:00
|
|
|
|
2018-08-01 17:23:50 +00:00
|
|
|
|
|
|
|
def test_kafka_json_settings(started_cluster):
|
|
|
|
instance.query('''
|
|
|
|
DROP TABLE IF EXISTS test.kafka;
|
|
|
|
CREATE TABLE test.kafka (key UInt64, value UInt64)
|
|
|
|
ENGINE = Kafka
|
|
|
|
SETTINGS
|
|
|
|
kafka_broker_list = 'kafka1:9092',
|
2018-08-27 16:15:39 +00:00
|
|
|
kafka_topic_list = 'json',
|
|
|
|
kafka_group_name = 'json',
|
|
|
|
kafka_format = 'JSONEachRow',
|
2019-01-22 12:18:18 +00:00
|
|
|
kafka_row_delimiter = '\\n',
|
|
|
|
kafka_skip_broken_messages = 1;
|
2018-08-01 17:23:50 +00:00
|
|
|
''')
|
2019-01-22 12:18:18 +00:00
|
|
|
|
|
|
|
kafka_check_json_numbers(instance, True)
|
|
|
|
|
2018-08-01 17:23:50 +00:00
|
|
|
instance.query('DROP TABLE test.kafka')
|
|
|
|
|
|
|
|
|
2019-01-22 12:18:18 +00:00
|
|
|
def test_kafka_json_materialized_view(started_cluster):
|
|
|
|
instance.query('''
|
|
|
|
DROP TABLE IF EXISTS test.kafka;
|
|
|
|
DROP TABLE IF EXISTS test.view;
|
|
|
|
DROP TABLE IF EXISTS test.consumer;
|
|
|
|
CREATE TABLE test.kafka (key UInt64, value UInt64)
|
|
|
|
ENGINE = Kafka
|
|
|
|
SETTINGS
|
|
|
|
kafka_broker_list = 'kafka1:9092',
|
|
|
|
kafka_topic_list = 'json',
|
|
|
|
kafka_group_name = 'json',
|
|
|
|
kafka_format = 'JSONEachRow',
|
|
|
|
kafka_row_delimiter = '\\n',
|
|
|
|
kafka_skip_broken_messages = 2;
|
|
|
|
CREATE TABLE test.view (key UInt64, value UInt64)
|
|
|
|
ENGINE = MergeTree()
|
|
|
|
ORDER BY key;
|
|
|
|
CREATE MATERIALIZED VIEW test.consumer TO test.view AS
|
|
|
|
SELECT * FROM test.kafka;
|
|
|
|
''')
|
|
|
|
|
2019-01-23 16:12:41 +00:00
|
|
|
kafka_check_json_numbers(instance, True, 'test.view', 1)
|
2019-01-22 12:18:18 +00:00
|
|
|
|
|
|
|
instance.query('''
|
|
|
|
DROP TABLE test.kafka;
|
|
|
|
DROP TABLE test.view;
|
|
|
|
DROP TABLE test.consumer;
|
|
|
|
''')
|
|
|
|
|
|
|
|
|
2018-07-26 14:40:33 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
cluster.start()
|
|
|
|
raw_input("Cluster created, press any key to destroy...")
|
|
|
|
cluster.shutdown()
|