2020-02-20 10:06:46 +00:00
# pylint: disable=unused-argument
# pylint: disable=redefined-outer-name
import pytest
from helpers . client import QueryRuntimeException
2020-09-16 04:26:10 +00:00
from helpers . cluster import ClickHouseCluster
2020-02-20 10:06:46 +00:00
cluster = ClickHouseCluster ( __file__ )
2022-03-22 16:39:58 +00:00
node = cluster . add_instance ( " node " , with_zookeeper = True )
2020-02-20 10:06:46 +00:00
2020-09-16 04:26:10 +00:00
2022-03-22 16:39:58 +00:00
@pytest.fixture ( scope = " module " )
2020-02-20 10:06:46 +00:00
def start_cluster ( ) :
try :
cluster . start ( )
yield cluster
finally :
cluster . shutdown ( )
2020-09-16 04:26:10 +00:00
2020-04-09 18:02:27 +00:00
def get_counts ( ) :
src = int ( node . query ( " SELECT count() FROM test " ) )
a = int ( node . query ( " SELECT count() FROM test_mv_a " ) )
b = int ( node . query ( " SELECT count() FROM test_mv_b " ) )
c = int ( node . query ( " SELECT count() FROM test_mv_c " ) )
return src , a , b , c
2020-02-20 10:06:46 +00:00
def test_basic ( start_cluster ) :
2024-05-31 12:43:35 +00:00
old_src , old_a , old_b , old_c = 0 , 0 , 0 , 0
2020-04-09 18:02:27 +00:00
node . query (
2022-03-22 16:39:58 +00:00
"""
2020-04-09 18:02:27 +00:00
CREATE TABLE test ( A Int64 ) ENGINE = ReplicatedMergeTree ( ' /clickhouse/test/tables/test ' , ' 1 ' ) ORDER BY tuple ( ) ;
CREATE MATERIALIZED VIEW test_mv_a Engine = ReplicatedMergeTree ( ' /clickhouse/test/tables/test_mv_a ' , ' 1 ' ) order by tuple ( ) AS SELECT A FROM test ;
CREATE MATERIALIZED VIEW test_mv_b Engine = ReplicatedMergeTree ( ' /clickhouse/test/tables/test_mv_b ' , ' 1 ' ) partition by A order by tuple ( ) AS SELECT A FROM test ;
CREATE MATERIALIZED VIEW test_mv_c Engine = ReplicatedMergeTree ( ' /clickhouse/test/tables/test_mv_c ' , ' 1 ' ) order by tuple ( ) AS SELECT A FROM test ;
INSERT INTO test values ( 999 ) ;
INSERT INTO test values ( 999 ) ;
2022-03-22 16:39:58 +00:00
"""
2020-04-09 18:02:27 +00:00
)
2024-05-31 12:43:35 +00:00
src , a , b , c = get_counts ( )
assert src == old_src + 1
assert a == old_a + 2
assert b == old_b + 2
assert c == old_c + 2
old_src , old_a , old_b , old_c = src , a , b , c
# that issert fails on test_mv_b due to partitions by A
2020-02-20 10:06:46 +00:00
with pytest . raises ( QueryRuntimeException ) :
node . query (
2022-03-22 16:39:58 +00:00
"""
2020-02-20 10:06:46 +00:00
SET max_partitions_per_insert_block = 3 ;
INSERT INTO test SELECT number FROM numbers ( 10 ) ;
2022-03-22 16:39:58 +00:00
"""
2020-02-20 10:06:46 +00:00
)
2024-05-31 12:43:35 +00:00
src , a , b , c = get_counts ( )
assert src == old_src + 10
assert a == old_a + 10
assert b == old_b
assert c == old_c + 10
old_src , old_a , old_b , old_c = src , a , b , c
2020-02-20 10:06:46 +00:00
2024-05-31 12:43:35 +00:00
# deduplication only for src table
2020-02-20 10:06:46 +00:00
node . query ( " INSERT INTO test SELECT number FROM numbers(10) " )
2020-04-09 18:02:27 +00:00
src , a , b , c = get_counts ( )
assert src == old_src
2024-05-31 12:43:35 +00:00
assert a == old_a + 10
assert b == old_b + 10
assert c == old_c + 10
old_src , old_a , old_b , old_c = src , a , b , c
2020-02-20 10:06:46 +00:00
2024-05-31 12:43:35 +00:00
# deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`.
2020-02-20 10:06:46 +00:00
node . query (
2022-03-22 16:39:58 +00:00
"""
2020-02-23 01:31:12 +00:00
SET deduplicate_blocks_in_dependent_materialized_views = 1 ;
2020-02-20 10:06:46 +00:00
INSERT INTO test SELECT number FROM numbers ( 10 ) ;
2022-03-22 16:39:58 +00:00
"""
2020-02-20 10:06:46 +00:00
)
2020-04-09 18:02:27 +00:00
src , a , b , c = get_counts ( )
2024-05-31 12:43:35 +00:00
assert src == old_src
assert a == old_a + 10
assert b == old_b + 10
2020-04-09 18:02:27 +00:00
assert c == old_c + 10
2024-05-31 12:43:35 +00:00
old_src , old_a , old_b , old_c = src , a , b , c
# deduplication for all the tables
node . query (
"""
SET deduplicate_blocks_in_dependent_materialized_views = 1 ;
INSERT INTO test SELECT number FROM numbers ( 10 ) ;
"""
)
src , a , b , c = get_counts ( )
assert src == old_src
assert a == old_a
assert b == old_b
assert c == old_c
old_src , old_a , old_b , old_c = src , a , b , c
2020-03-16 20:03:52 +00:00
2024-05-31 12:43:35 +00:00
# that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated
2020-03-16 20:03:52 +00:00
with pytest . raises ( QueryRuntimeException ) :
node . query (
2022-03-22 16:39:58 +00:00
"""
2020-03-16 20:03:52 +00:00
SET max_partitions_per_insert_block = 3 ;
SET deduplicate_blocks_in_dependent_materialized_views = 1 ;
INSERT INTO test SELECT number FROM numbers ( 100 , 10 ) ;
2022-03-22 16:39:58 +00:00
"""
2020-03-16 20:03:52 +00:00
)
2024-05-31 12:43:35 +00:00
src , a , b , c = get_counts ( )
assert src == old_src + 10
assert a == old_a + 10
assert b == old_b
assert c == old_c + 10
old_src , old_a , old_b , old_c = src , a , b , c
2020-09-16 04:26:10 +00:00
2024-05-31 12:43:35 +00:00
# deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert
2020-03-16 20:03:52 +00:00
node . query (
2022-03-22 16:39:58 +00:00
"""
2020-03-16 20:03:52 +00:00
SET deduplicate_blocks_in_dependent_materialized_views = 1 ;
INSERT INTO test SELECT number FROM numbers ( 100 , 10 ) ;
2022-03-22 16:39:58 +00:00
"""
2020-03-16 20:03:52 +00:00
)
2020-04-09 18:02:27 +00:00
src , a , b , c = get_counts ( )
2024-05-31 12:43:35 +00:00
assert src == old_src
assert a == old_a
assert b == old_b + 10
assert c == old_c
old_src , old_a , old_b , old_c = src , a , b , c