ClickHouse/dbms/tests/integration/test_extreme_deduplication/test.py

import time
from contextlib import contextmanager

import pytest

from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
from helpers.test_tools import TSV
from helpers.client import CommandRequest
from helpers.client import QueryTimeoutExceedException


cluster = ClickHouseCluster(__file__)

node1 = cluster.add_instance('node1', config_dir='configs', with_zookeeper=True, macros={"layer": 0, "shard": 0, "replica": 1})
node2 = cluster.add_instance('node2', config_dir='configs', with_zookeeper=True, macros={"layer": 0, "shard": 0, "replica": 2})
nodes = [node1, node2]

@pytest.fixture(scope="module")
def started_cluster():
    try:
        cluster.start()
        yield cluster

    finally:
        pass
        cluster.shutdown()


def test_deduplication_window_in_seconds(started_cluster):
    node = node1

    node1.query("""
        CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32)
        ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}', date, id, 8192)""")

    node.query("INSERT INTO simple VALUES (0, 0)")
    time.sleep(1)
    node.query("INSERT INTO simple VALUES (0, 0)") # deduplication works here
    node.query("INSERT INTO simple VALUES (0, 1)")
    assert TSV(node.query("SELECT count() FROM simple")) == TSV("2\n")

    # wait clean thread
    time.sleep(2)

    assert TSV.toMat(node.query("SELECT count() FROM system.zookeeper WHERE path='/clickhouse/tables/0/simple/blocks'"))[0][0] == "1"
    node.query("INSERT INTO simple VALUES (0, 0)") # deduplication doesn't works here, the first hash node was deleted
    assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3"

    node1.query("""DROP TABLE simple ON CLUSTER test_cluster""")


# Currently this test just reproduce incorrect behavior that sould be fixed
def test_deduplication_works_in_case_of_intensive_inserts(started_cluster):
    inserters = []
    fetchers = []

    node1.query("""
        CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32)
        ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}', date, id, 8192)""")

    node1.query("INSERT INTO simple VALUES (0, 0)")

    for node in nodes:
        host = node.ip_address

        inserters.append(CommandRequest(['/bin/bash'], timeout=10, stdin="""
set -e
for i in `seq 1000`; do
    {} --host {} -q "INSERT INTO simple VALUES (0, 0)"
done
""".format(cluster.get_client_cmd(), host)))

        fetchers.append(CommandRequest(['/bin/bash'], timeout=10, stdin="""
set -e
for i in `seq 1000`; do
    res=`{} --host {} -q "SELECT count() FROM simple"`
    if [[ $? -ne 0 || $res -ne 1 ]]; then
        echo "Selected $res elements! Host: {}" 1>&2
        exit -1
    fi;
done
""".format(cluster.get_client_cmd(), host, node.name)))

    # There were not errors during INSERTs
    for inserter in inserters:
        try:
            inserter.get_answer()
        except QueryTimeoutExceedException:
            # Only timeout is accepted
            pass

    # There were not errors during SELECTs
    for fetcher in fetchers:
        try:
            fetcher.get_answer()
        except QueryTimeoutExceedException:
            # Only timeout is accepted
            pass

    node1.query("""DROP TABLE simple ON CLUSTER test_cluster""")
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`import time`
			`from contextlib import contextmanager`

			`import pytest`

			`from helpers.cluster import ClickHouseCluster`
			`from helpers.network import PartitionManager`
			`from helpers.test_tools import TSV`
			`from helpers.client import CommandRequest`
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`from helpers.client import QueryTimeoutExceedException`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00

			`cluster = ClickHouseCluster(__file__)`

~/work/ClickHouse/dbms/tests/integration$ find . -name '*.py' \| xargs sed -i -r -e 's/macroses/macros/g' # [#CLICKHOUSE-2] 2018-07-25 16:00:51 +00:00			`node1 = cluster.add_instance('node1', config_dir='configs', with_zookeeper=True, macros={"layer": 0, "shard": 0, "replica": 1})`
			`node2 = cluster.add_instance('node2', config_dir='configs', with_zookeeper=True, macros={"layer": 0, "shard": 0, "replica": 2})`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`nodes = [node1, node2]`

			`@pytest.fixture(scope="module")`
			`def started_cluster():`
			`try:`
			`cluster.start()`
			`yield cluster`

			`finally:`
Fix integration tests. [#CLICKHOUSE-3173] 2017-07-26 14:15:16 +00:00			`pass`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`cluster.shutdown()`


			`def test_deduplication_window_in_seconds(started_cluster):`
			`node = node1`

Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`node1.query("""`
			`CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32)`
			`ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}', date, id, 8192)""")`

			`node.query("INSERT INTO simple VALUES (0, 0)")`
Fix integration tests. [#CLICKHOUSE-3173] 2017-07-26 14:15:16 +00:00			`time.sleep(1)`
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`node.query("INSERT INTO simple VALUES (0, 0)") # deduplication works here`
			`node.query("INSERT INTO simple VALUES (0, 1)")`
			`assert TSV(node.query("SELECT count() FROM simple")) == TSV("2\n")`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00
			`# wait clean thread`
			`time.sleep(2)`

Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`assert TSV.toMat(node.query("SELECT count() FROM system.zookeeper WHERE path='/clickhouse/tables/0/simple/blocks'"))[0][0] == "1"`
			`node.query("INSERT INTO simple VALUES (0, 0)") # deduplication doesn't works here, the first hash node was deleted`
			`assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3"`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`node1.query("""DROP TABLE simple ON CLUSTER test_cluster""")`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00

			`# Currently this test just reproduce incorrect behavior that sould be fixed`
			`def test_deduplication_works_in_case_of_intensive_inserts(started_cluster):`
			`inserters = []`
			`fetchers = []`

Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`node1.query("""`
			`CREATE TABLE simple ON CLUSTER test_cluster (date Date, id UInt32)`
			`ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/simple', '{replica}', date, id, 8192)""")`

			`node1.query("INSERT INTO simple VALUES (0, 0)")`

Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`for node in nodes:`
			`host = node.ip_address`

			`inserters.append(CommandRequest(['/bin/bash'], timeout=10, stdin="""`
			`set -e`
			for i in `seq 1000`; do
CLICKHOUSE-3894: Fixes in tests 2018-09-07 11:51:51 +00:00			`{} --host {} -q "INSERT INTO simple VALUES (0, 0)"`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`done`
CLICKHOUSE-3894: Fixes in tests 2018-09-07 11:51:51 +00:00			`""".format(cluster.get_client_cmd(), host)))`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00
			`fetchers.append(CommandRequest(['/bin/bash'], timeout=10, stdin="""`
			`set -e`
			for i in `seq 1000`; do
CLICKHOUSE-3894: Fixes in tests 2018-09-07 11:51:51 +00:00			res=`{} --host {} -q "SELECT count() FROM simple"`
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`if [[ $? -ne 0 \|\| $res -ne 1 ]]; then`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`echo "Selected $res elements! Host: {}" 1>&2`
			`exit -1`
			`fi;`
			`done`
CLICKHOUSE-3894: Fixes in tests 2018-09-07 11:51:51 +00:00			`""".format(cluster.get_client_cmd(), host, node.name)))`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00
			`# There were not errors during INSERTs`
			`for inserter in inserters:`
			`try:`
			`inserter.get_answer()`
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`except QueryTimeoutExceedException:`
			`# Only timeout is accepted`
			`pass`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00
			`# There were not errors during SELECTs`
			`for fetcher in fetchers:`
			`try:`
			`fetcher.get_answer()`
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00			`except QueryTimeoutExceedException:`
			`# Only timeout is accepted`
Add test. [#CLICKHOUSE-3173] 2017-07-26 12:31:55 +00:00			`pass`
Add integration test. [#CLICKHOUSE-3178] 2017-10-06 11:29:58 +00:00
			`node1.query("""DROP TABLE simple ON CLUSTER test_cluster""")`