2020-02-27 16:47:40 +00:00
import logging
2020-03-19 16:37:55 +00:00
import time
2021-04-21 09:53:46 +00:00
import os
2020-02-27 16:47:40 +00:00
import pytest
2022-07-07 20:19:15 +00:00
from helpers . cluster import ClickHouseCluster
2023-01-02 12:51:17 +00:00
from helpers . mock_servers import start_mock_servers
2021-12-07 14:43:55 +00:00
from helpers . utility import generate_values , replace_config , SafeThread
2022-09-18 21:47:43 +00:00
from helpers . wait_for_helpers import wait_for_delete_inactive_parts
from helpers . wait_for_helpers import wait_for_delete_empty_parts
2022-09-18 21:25:26 +00:00
2020-02-27 16:47:40 +00:00
2021-04-21 09:53:46 +00:00
SCRIPT_DIR = os . path . dirname ( os . path . realpath ( __file__ ) )
2020-02-27 16:47:40 +00:00
@pytest.fixture ( scope = " module " )
def cluster ( ) :
try :
cluster = ClickHouseCluster ( __file__ )
2022-03-22 16:39:58 +00:00
cluster . add_instance (
" node " ,
main_configs = [
2022-09-06 09:14:40 +00:00
" configs/config.xml " ,
2022-03-22 16:39:58 +00:00
" configs/config.d/storage_conf.xml " ,
" configs/config.d/bg_processing_pool_conf.xml " ,
] ,
2022-08-03 17:49:16 +00:00
stay_alive = True ,
2022-03-22 16:39:58 +00:00
with_minio = True ,
)
2022-07-11 16:25:28 +00:00
cluster . add_instance (
" node_with_limited_disk " ,
main_configs = [
" configs/config.d/storage_conf.xml " ,
" configs/config.d/bg_processing_pool_conf.xml " ,
] ,
with_minio = True ,
tmpfs = [
" /jbod1:size=2M " ,
] ,
)
2022-09-14 18:29:36 +00:00
2020-02-27 16:47:40 +00:00
logging . info ( " Starting cluster... " )
cluster . start ( )
logging . info ( " Cluster started " )
2021-08-04 10:27:45 +00:00
run_s3_mocks ( cluster )
2020-02-27 16:47:40 +00:00
yield cluster
finally :
cluster . shutdown ( )
2020-03-19 16:37:55 +00:00
FILES_OVERHEAD = 1
FILES_OVERHEAD_PER_COLUMN = 2 # Data and mark files
2023-02-26 02:47:05 +00:00
FILES_OVERHEAD_PER_PART_WIDE = FILES_OVERHEAD_PER_COLUMN * 3 + 2 + 6 + 1
FILES_OVERHEAD_PER_PART_COMPACT = 10 + 1
2020-03-19 16:37:55 +00:00
2021-09-20 18:18:08 +00:00
def create_table ( node , table_name , * * additional_settings ) :
2021-08-04 10:27:45 +00:00
settings = {
" storage_policy " : " s3 " ,
" old_parts_lifetime " : 0 ,
2022-03-22 16:39:58 +00:00
" index_granularity " : 512 ,
2021-08-04 10:27:45 +00:00
}
settings . update ( additional_settings )
create_table_statement = f """
CREATE TABLE { table_name } (
2020-02-27 16:47:40 +00:00
dt Date ,
2020-03-19 16:37:55 +00:00
id Int64 ,
2020-02-27 16:47:40 +00:00
data String ,
INDEX min_max ( id ) TYPE minmax GRANULARITY 3
) ENGINE = MergeTree ( )
PARTITION BY dt
ORDER BY ( dt , id )
2021-08-04 10:27:45 +00:00
SETTINGS { " , " . join ( ( k + " = " + repr ( v ) for k , v in settings . items ( ) ) ) } """
2020-03-19 16:37:55 +00:00
2021-09-20 18:18:08 +00:00
node . query ( f " DROP TABLE IF EXISTS { table_name } " )
2020-03-19 16:37:55 +00:00
node . query ( create_table_statement )
2021-08-04 10:27:45 +00:00
def run_s3_mocks ( cluster ) :
2023-01-02 12:51:17 +00:00
script_dir = os . path . join ( os . path . dirname ( __file__ ) , " s3_mocks " )
start_mock_servers (
cluster ,
script_dir ,
[
( " unstable_proxy.py " , " resolver " , " 8081 " ) ,
( " no_delete_objects.py " , " resolver " , " 8082 " ) ,
] ,
2022-07-11 07:22:06 +00:00
)
2021-08-04 10:27:45 +00:00
2021-04-09 19:30:42 +00:00
def wait_for_delete_s3_objects ( cluster , expected , timeout = 30 ) :
minio = cluster . minio_client
while timeout > 0 :
2022-09-16 12:06:26 +00:00
if (
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
== expected
) :
2021-04-09 19:30:42 +00:00
return
timeout - = 1
time . sleep ( 1 )
2022-09-16 12:06:26 +00:00
assert (
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
== expected
)
2021-04-09 19:30:42 +00:00
2020-03-19 16:37:55 +00:00
@pytest.fixture ( autouse = True )
2021-11-11 10:19:49 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
2021-09-20 18:18:08 +00:00
def drop_table ( cluster , node_name ) :
2020-03-19 16:37:55 +00:00
yield
2021-09-20 18:18:08 +00:00
node = cluster . instances [ node_name ]
2020-03-19 16:37:55 +00:00
minio = cluster . minio_client
2020-03-27 22:58:03 +00:00
node . query ( " DROP TABLE IF EXISTS s3_test NO DELAY " )
2021-04-09 19:30:42 +00:00
2020-04-08 12:48:16 +00:00
try :
2021-04-09 19:30:42 +00:00
wait_for_delete_s3_objects ( cluster , 0 )
2020-04-08 12:48:16 +00:00
finally :
# Remove extra objects to prevent tests cascade failing
2022-09-16 12:06:26 +00:00
for obj in list (
minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True )
) :
2020-04-08 12:48:16 +00:00
minio . remove_object ( cluster . minio_bucket , obj . object_name )
2020-03-19 16:37:55 +00:00
@pytest.mark.parametrize (
2021-09-20 18:18:08 +00:00
" min_rows_for_wide_part,files_per_part,node_name " ,
2020-03-19 16:37:55 +00:00
[
2021-09-20 18:18:08 +00:00
( 0 , FILES_OVERHEAD_PER_PART_WIDE , " node " ) ,
2022-03-22 16:39:58 +00:00
( 8192 , FILES_OVERHEAD_PER_PART_COMPACT , " node " ) ,
] ,
2020-03-19 16:37:55 +00:00
)
2022-03-22 16:39:58 +00:00
def test_simple_insert_select (
cluster , min_rows_for_wide_part , files_per_part , node_name
) :
2021-09-20 18:18:08 +00:00
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " , min_rows_for_wide_part = min_rows_for_wide_part )
2020-03-19 16:37:55 +00:00
minio = cluster . minio_client
2020-02-27 16:47:40 +00:00
2022-03-22 16:39:58 +00:00
values1 = generate_values ( " 2020-01-03 " , 4096 )
2020-02-27 16:47:40 +00:00
node . query ( " INSERT INTO s3_test VALUES {} " . format ( values1 ) )
assert node . query ( " SELECT * FROM s3_test order by dt, id FORMAT Values " ) == values1
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + files_per_part
)
2020-02-27 16:47:40 +00:00
2022-03-22 16:39:58 +00:00
values2 = generate_values ( " 2020-01-04 " , 4096 )
2020-02-27 16:47:40 +00:00
node . query ( " INSERT INTO s3_test VALUES {} " . format ( values2 ) )
2022-03-22 16:39:58 +00:00
assert (
node . query ( " SELECT * FROM s3_test ORDER BY dt, id FORMAT Values " )
== values1 + " , " + values2
)
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + files_per_part * 2
)
2020-02-27 16:47:40 +00:00
2022-03-22 16:39:58 +00:00
assert (
node . query ( " SELECT count(*) FROM s3_test where id = 1 FORMAT Values " ) == " (2) "
)
2020-02-27 16:47:40 +00:00
2022-03-22 16:39:58 +00:00
@pytest.mark.parametrize ( " merge_vertical,node_name " , [ ( True , " node " ) , ( False , " node " ) ] )
2021-09-20 18:18:08 +00:00
def test_insert_same_partition_and_merge ( cluster , merge_vertical , node_name ) :
2021-08-04 10:27:45 +00:00
settings = { }
2020-03-19 16:37:55 +00:00
if merge_vertical :
2022-03-22 16:39:58 +00:00
settings [ " vertical_merge_algorithm_min_rows_to_activate " ] = 0
settings [ " vertical_merge_algorithm_min_columns_to_activate " ] = 0
2021-08-04 10:27:45 +00:00
2021-09-20 18:18:08 +00:00
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " , * * settings )
2020-03-19 16:37:55 +00:00
minio = cluster . minio_client
node . query ( " SYSTEM STOP MERGES s3_test " )
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 1024 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 2048 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 1024 , - 1 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 2048 , - 1 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 , - 1 ) )
)
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
2022-03-22 16:39:58 +00:00
assert (
node . query ( " SELECT count(distinct(id)) FROM s3_test FORMAT Values " ) == " (8192) "
)
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD
)
2020-03-19 16:37:55 +00:00
node . query ( " SYSTEM START MERGES s3_test " )
2021-03-17 13:00:22 +00:00
2020-03-19 16:37:55 +00:00
# Wait for merges and old parts deletion
2021-03-17 13:00:22 +00:00
for attempt in range ( 0 , 10 ) :
2022-03-22 16:39:58 +00:00
parts_count = node . query (
2022-05-11 22:04:54 +00:00
" SELECT COUNT(*) FROM system.parts WHERE table = ' s3_test ' and active = 1 FORMAT Values "
2022-03-22 16:39:58 +00:00
)
2022-05-11 22:04:54 +00:00
2021-03-17 13:00:22 +00:00
if parts_count == " (1) " :
break
if attempt == 9 :
assert parts_count == " (1) "
time . sleep ( 1 )
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
2022-03-22 16:39:58 +00:00
assert (
node . query ( " SELECT count(distinct(id)) FROM s3_test FORMAT Values " ) == " (8192) "
)
2022-05-14 11:46:47 +00:00
wait_for_delete_s3_objects (
cluster , FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD , timeout = 45
)
2020-03-19 16:37:55 +00:00
2021-11-11 10:19:49 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
2021-09-20 18:18:08 +00:00
def test_alter_table_columns ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2023-02-26 02:47:05 +00:00
minio = cluster . minio_client
2020-03-19 16:37:55 +00:00
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 , - 1 ) )
)
2020-03-19 16:37:55 +00:00
node . query ( " ALTER TABLE s3_test ADD COLUMN col1 UInt64 DEFAULT 1 " )
# To ensure parts have merged
node . query ( " OPTIMIZE TABLE s3_test " )
assert node . query ( " SELECT sum(col1) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
node . query ( " SELECT sum(col1) FROM s3_test WHERE id > 0 FORMAT Values " )
== " (4096) "
)
wait_for_delete_s3_objects (
cluster ,
FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN ,
)
2020-03-19 16:37:55 +00:00
2022-03-22 16:39:58 +00:00
node . query (
" ALTER TABLE s3_test MODIFY COLUMN col1 String " , settings = { " mutations_sync " : 2 }
)
2020-03-21 08:10:44 +00:00
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT distinct(col1) FROM s3_test FORMAT Values " ) == " ( ' 1 ' ) "
2020-03-21 08:10:44 +00:00
# and file with mutation
2022-03-22 16:39:58 +00:00
wait_for_delete_s3_objects (
cluster ,
FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD_PER_COLUMN + 1 ,
)
2020-03-21 08:10:44 +00:00
node . query ( " ALTER TABLE s3_test DROP COLUMN col1 " , settings = { " mutations_sync " : 2 } )
# and 2 files with mutations
2022-03-22 16:39:58 +00:00
wait_for_delete_s3_objects (
cluster , FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2
)
2020-03-19 16:37:55 +00:00
2021-11-11 10:19:49 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
2021-09-20 18:18:08 +00:00
def test_attach_detach_partition ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2020-03-19 16:37:55 +00:00
minio = cluster . minio_client
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-03-19 16:37:55 +00:00
node . query ( " ALTER TABLE s3_test DETACH PARTITION ' 2020-01-03 ' " )
2022-09-18 21:25:26 +00:00
wait_for_delete_empty_parts ( node , " s3_test " )
2022-12-12 11:37:52 +00:00
wait_for_delete_inactive_parts ( node , " s3_test " )
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (4096) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-03-19 16:37:55 +00:00
node . query ( " ALTER TABLE s3_test ATTACH PARTITION ' 2020-01-03 ' " )
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-03-19 16:37:55 +00:00
node . query ( " ALTER TABLE s3_test DROP PARTITION ' 2020-01-03 ' " )
2022-09-18 21:25:26 +00:00
wait_for_delete_empty_parts ( node , " s3_test " )
2022-12-12 11:37:52 +00:00
wait_for_delete_inactive_parts ( node , " s3_test " )
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (4096) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-09-18 21:25:26 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1
2022-03-22 16:39:58 +00:00
)
2020-03-19 16:37:55 +00:00
node . query ( " ALTER TABLE s3_test DETACH PARTITION ' 2020-01-04 ' " )
2022-09-18 21:25:26 +00:00
wait_for_delete_empty_parts ( node , " s3_test " )
2022-12-12 11:37:52 +00:00
wait_for_delete_inactive_parts ( node , " s3_test " )
2022-09-18 21:25:26 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (0) "
assert (
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " ) ) )
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1
)
2022-03-22 16:39:58 +00:00
node . query (
" ALTER TABLE s3_test DROP DETACHED PARTITION ' 2020-01-04 ' " ,
settings = { " allow_drop_detached " : 1 } ,
)
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (0) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-09-18 21:25:26 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 0
2022-03-22 16:39:58 +00:00
)
2020-03-19 16:37:55 +00:00
2021-11-11 10:19:49 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
2021-09-20 18:18:08 +00:00
def test_move_partition_to_another_disk ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2020-03-19 16:37:55 +00:00
minio = cluster . minio_client
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-03-19 16:37:55 +00:00
node . query ( " ALTER TABLE s3_test MOVE PARTITION ' 2020-01-04 ' TO DISK ' hdd ' " )
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE
)
2020-03-19 16:37:55 +00:00
2020-08-07 11:40:19 +00:00
node . query ( " ALTER TABLE s3_test MOVE PARTITION ' 2020-01-04 ' TO DISK ' s3 ' " )
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-08-07 11:40:19 +00:00
2020-03-19 16:37:55 +00:00
2021-09-20 18:18:08 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_table_manipulations ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2020-03-19 16:37:55 +00:00
minio = cluster . minio_client
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
2020-03-19 16:37:55 +00:00
node . query ( " RENAME TABLE s3_test TO s3_renamed " )
assert node . query ( " SELECT count(*) FROM s3_renamed FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-03-19 16:37:55 +00:00
node . query ( " RENAME TABLE s3_renamed TO s3_test " )
2020-04-08 12:48:16 +00:00
assert node . query ( " CHECK TABLE s3_test FORMAT Values " ) == " (1) "
2020-03-19 16:37:55 +00:00
node . query ( " DETACH TABLE s3_test " )
node . query ( " ATTACH TABLE s3_test " )
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2020-03-19 16:37:55 +00:00
node . query ( " TRUNCATE TABLE s3_test " )
2022-09-18 21:25:26 +00:00
wait_for_delete_empty_parts ( node , " s3_test " )
2022-12-12 11:37:52 +00:00
wait_for_delete_inactive_parts ( node , " s3_test " )
2020-03-19 16:37:55 +00:00
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (0) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
== FILES_OVERHEAD
2022-03-22 16:39:58 +00:00
)
2020-03-23 14:45:48 +00:00
2021-11-11 10:19:49 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
2021-09-20 18:18:08 +00:00
def test_move_replace_partition_to_another_table ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2020-03-23 14:45:48 +00:00
minio = cluster . minio_client
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-05 " , 4096 , - 1 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-06 " , 4096 , - 1 ) )
)
2020-03-23 14:45:48 +00:00
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (16384) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
)
2020-03-23 14:45:48 +00:00
2021-09-20 18:18:08 +00:00
create_table ( node , " s3_clone " )
2020-03-23 14:45:48 +00:00
node . query ( " ALTER TABLE s3_test MOVE PARTITION ' 2020-01-03 ' TO TABLE s3_clone " )
node . query ( " ALTER TABLE s3_test MOVE PARTITION ' 2020-01-05 ' TO TABLE s3_clone " )
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (8192) "
assert node . query ( " SELECT sum(id) FROM s3_clone FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_clone FORMAT Values " ) == " (8192) "
# Number of objects in S3 should be unchanged.
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
)
2020-03-23 14:45:48 +00:00
# Add new partitions to source table, but with different values and replace them from copied table.
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 , - 1 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-05 " , 4096 ) )
)
2020-03-23 14:45:48 +00:00
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (16384) "
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6
)
2020-03-23 14:45:48 +00:00
node . query ( " ALTER TABLE s3_test REPLACE PARTITION ' 2020-01-03 ' FROM s3_clone " )
node . query ( " ALTER TABLE s3_test REPLACE PARTITION ' 2020-01-05 ' FROM s3_clone " )
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (16384) "
assert node . query ( " SELECT sum(id) FROM s3_clone FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_clone FORMAT Values " ) == " (8192) "
# Wait for outdated partitions deletion.
2022-03-22 16:39:58 +00:00
wait_for_delete_s3_objects (
cluster , FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4
)
2020-03-23 14:45:48 +00:00
2020-03-27 22:58:03 +00:00
node . query ( " DROP TABLE s3_clone NO DELAY " )
2020-03-23 14:45:48 +00:00
assert node . query ( " SELECT sum(id) FROM s3_test FORMAT Values " ) == " (0) "
assert node . query ( " SELECT count(*) FROM s3_test FORMAT Values " ) == " (16384) "
# Data should remain in S3
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
)
2020-03-23 14:45:48 +00:00
node . query ( " ALTER TABLE s3_test FREEZE " )
# Number S3 objects should be unchanged.
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4
)
2020-03-23 14:45:48 +00:00
2020-03-27 22:58:03 +00:00
node . query ( " DROP TABLE s3_test NO DELAY " )
2020-03-23 14:45:48 +00:00
# Backup data should remain in S3.
2021-04-09 19:30:42 +00:00
wait_for_delete_s3_objects ( cluster , FILES_OVERHEAD_PER_PART_WIDE * 4 )
2020-03-23 14:45:48 +00:00
2022-09-16 12:06:26 +00:00
for obj in list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) :
2020-03-23 14:45:48 +00:00
minio . remove_object ( cluster . minio_bucket , obj . object_name )
2021-02-24 14:26:46 +00:00
2021-09-20 18:18:08 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_freeze_unfreeze ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2021-02-24 14:26:46 +00:00
minio = cluster . minio_client
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
2021-02-24 14:26:46 +00:00
node . query ( " ALTER TABLE s3_test FREEZE WITH NAME ' backup1 ' " )
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
2021-03-02 21:10:09 +00:00
node . query ( " ALTER TABLE s3_test FREEZE WITH NAME ' backup2 ' " )
2021-02-24 14:26:46 +00:00
node . query ( " TRUNCATE TABLE s3_test " )
2022-09-18 21:25:26 +00:00
wait_for_delete_empty_parts ( node , " s3_test " )
2022-12-12 11:37:52 +00:00
wait_for_delete_inactive_parts ( node , " s3_test " )
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-03-22 16:39:58 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
2021-03-02 21:10:09 +00:00
# Unfreeze single partition from backup1.
2022-03-22 16:39:58 +00:00
node . query (
" ALTER TABLE s3_test UNFREEZE PARTITION ' 2020-01-03 ' WITH NAME ' backup1 ' "
)
2021-03-02 21:10:09 +00:00
# Unfreeze all partitions from backup2.
node . query ( " ALTER TABLE s3_test UNFREEZE WITH NAME ' backup2 ' " )
2021-02-24 14:26:46 +00:00
2022-09-06 09:14:40 +00:00
wait_for_delete_s3_objects ( cluster , FILES_OVERHEAD )
2021-03-02 21:10:09 +00:00
# Data should be removed from S3.
2022-03-22 16:39:58 +00:00
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
== FILES_OVERHEAD
2022-03-22 16:39:58 +00:00
)
2021-04-21 09:53:46 +00:00
2022-06-08 12:09:59 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_freeze_system_unfreeze ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
create_table ( node , " s3_test_removed " )
minio = cluster . minio_client
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 ) )
)
node . query ( " ALTER TABLE s3_test FREEZE WITH NAME ' backup3 ' " )
node . query ( " ALTER TABLE s3_test_removed FREEZE WITH NAME ' backup3 ' " )
node . query ( " TRUNCATE TABLE s3_test " )
2022-09-18 21:25:26 +00:00
wait_for_delete_empty_parts ( node , " s3_test " )
2022-12-12 11:37:52 +00:00
wait_for_delete_inactive_parts ( node , " s3_test " )
2022-06-08 12:09:59 +00:00
node . query ( " DROP TABLE s3_test_removed NO DELAY " )
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
2022-06-08 12:09:59 +00:00
== FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2
)
# Unfreeze all data from backup3.
node . query ( " SYSTEM UNFREEZE WITH NAME ' backup3 ' " )
2022-09-06 09:14:40 +00:00
wait_for_delete_s3_objects ( cluster , FILES_OVERHEAD )
2022-06-08 12:09:59 +00:00
# Data should be removed from S3.
assert (
2022-09-16 12:06:26 +00:00
len ( list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) )
== FILES_OVERHEAD
2022-06-08 12:09:59 +00:00
)
2021-09-20 18:18:08 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_s3_disk_apply_new_settings ( cluster , node_name ) :
node = cluster . instances [ node_name ]
create_table ( node , " s3_test " )
2021-04-21 09:53:46 +00:00
2022-07-07 20:19:15 +00:00
config_path = os . path . join (
SCRIPT_DIR ,
" ./ {} /node/configs/config.d/storage_conf.xml " . format (
cluster . instances_dir_name
) ,
)
2021-04-21 09:53:46 +00:00
def get_s3_requests ( ) :
node . query ( " SYSTEM FLUSH LOGS " )
2022-03-22 16:39:58 +00:00
return int (
node . query (
" SELECT value FROM system.events WHERE event= ' S3WriteRequestsCount ' "
)
)
2021-04-21 09:53:46 +00:00
s3_requests_before = get_s3_requests ( )
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-03 " , 4096 ) )
)
2021-04-21 09:53:46 +00:00
s3_requests_to_write_partition = get_s3_requests ( ) - s3_requests_before
# Force multi-part upload mode.
2022-03-22 16:39:58 +00:00
replace_config (
2022-07-07 20:19:15 +00:00
config_path ,
2021-12-07 14:43:55 +00:00
" <s3_max_single_part_upload_size>33554432</s3_max_single_part_upload_size> " ,
2022-03-22 16:39:58 +00:00
" <s3_max_single_part_upload_size>0</s3_max_single_part_upload_size> " ,
)
2021-04-21 09:53:46 +00:00
node . query ( " SYSTEM RELOAD CONFIG " )
s3_requests_before = get_s3_requests ( )
2022-03-22 16:39:58 +00:00
node . query (
" INSERT INTO s3_test VALUES {} " . format ( generate_values ( " 2020-01-04 " , 4096 , - 1 ) )
)
2021-04-21 09:53:46 +00:00
# There should be 3 times more S3 requests because multi-part upload mode uses 3 requests to upload object.
assert get_s3_requests ( ) - s3_requests_before == s3_requests_to_write_partition * 3
2022-07-11 07:22:06 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_s3_no_delete_objects ( cluster , node_name ) :
node = cluster . instances [ node_name ]
2022-07-11 14:53:05 +00:00
create_table (
node , " s3_test_no_delete_objects " , storage_policy = " no_delete_objects_s3 "
2022-07-11 07:22:06 +00:00
)
2022-07-11 14:53:05 +00:00
node . query ( " DROP TABLE s3_test_no_delete_objects SYNC " )
2022-07-11 07:22:06 +00:00
2021-11-11 10:19:49 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
2021-09-20 18:18:08 +00:00
def test_s3_disk_reads_on_unstable_connection ( cluster , node_name ) :
node = cluster . instances [ node_name ]
2022-03-22 16:39:58 +00:00
create_table ( node , " s3_test " , storage_policy = " unstable_s3 " )
node . query (
" INSERT INTO s3_test SELECT today(), *, toString(*) FROM system.numbers LIMIT 9000000 "
)
2021-08-04 10:27:45 +00:00
for i in range ( 30 ) :
print ( f " Read sequence { i } " )
2022-03-22 16:39:58 +00:00
assert node . query ( " SELECT sum(id) FROM s3_test " ) . splitlines ( ) == [
" 40499995500000 "
]
2021-12-16 21:29:25 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_lazy_seek_optimization_for_async_read ( cluster , node_name ) :
node = cluster . instances [ node_name ]
2021-12-19 18:24:52 +00:00
node . query ( " DROP TABLE IF EXISTS s3_test NO DELAY " )
2022-03-22 16:39:58 +00:00
node . query (
" CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy= ' s3 ' ; "
)
node . query (
" INSERT INTO s3_test SELECT * FROM generateRandom( ' key UInt32, value String ' ) LIMIT 10000000 "
)
2021-12-19 18:24:52 +00:00
node . query ( " SELECT * FROM s3_test WHERE value LIKE ' %a bc % ' ORDER BY value LIMIT 10 " )
node . query ( " DROP TABLE IF EXISTS s3_test NO DELAY " )
minio = cluster . minio_client
2022-09-16 12:06:26 +00:00
for obj in list ( minio . list_objects ( cluster . minio_bucket , " data/ " , recursive = True ) ) :
2021-12-19 18:24:52 +00:00
minio . remove_object ( cluster . minio_bucket , obj . object_name )
2022-07-11 16:25:28 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node_with_limited_disk " ] )
def test_cache_with_full_disk_space ( cluster , node_name ) :
node = cluster . instances [ node_name ]
node . query ( " DROP TABLE IF EXISTS s3_test NO DELAY " )
node . query (
" CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy= ' s3_with_cache_and_jbod ' ; "
)
node . query (
" INSERT INTO s3_test SELECT * FROM generateRandom( ' key UInt32, value String ' ) LIMIT 500000 "
)
node . query (
" SELECT * FROM s3_test WHERE value LIKE ' %a bc % ' ORDER BY value FORMAT Null "
)
assert node . contains_in_log (
2022-07-13 09:48:39 +00:00
" Insert into cache is skipped due to insufficient disk space "
2022-07-11 16:25:28 +00:00
)
node . query ( " DROP TABLE IF EXISTS s3_test NO DELAY " )
2022-08-03 17:49:16 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_store_cleanup_disk_s3 ( cluster , node_name ) :
node = cluster . instances [ node_name ]
2022-08-04 17:48:55 +00:00
node . query ( " DROP TABLE IF EXISTS s3_test SYNC " )
2022-08-03 17:49:16 +00:00
node . query (
2022-08-04 17:48:55 +00:00
" CREATE TABLE s3_test UUID ' 00000000-1000-4000-8000-000000000001 ' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy= ' s3 ' ; "
2022-08-03 17:49:16 +00:00
)
2022-08-04 17:48:55 +00:00
node . query ( " INSERT INTO s3_test SELECT 1 " )
2022-08-03 17:49:16 +00:00
node . stop_clickhouse ( kill = True )
path_to_data = " /var/lib/clickhouse/ "
2022-08-04 17:48:55 +00:00
node . exec_in_container ( [ " rm " , f " { path_to_data } /metadata/default/s3_test.sql " ] )
2022-08-03 17:49:16 +00:00
node . start_clickhouse ( )
node . wait_for_log_line (
" Removing unused directory " , timeout = 90 , look_behind_lines = 1000
)
node . wait_for_log_line ( " directories from store " )
node . query (
2022-08-04 17:48:55 +00:00
" CREATE TABLE s3_test UUID ' 00000000-1000-4000-8000-000000000001 ' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy= ' s3 ' ; "
2022-08-03 17:49:16 +00:00
)
2022-08-04 17:48:55 +00:00
node . query ( " INSERT INTO s3_test SELECT 1 " )
2022-09-14 18:29:36 +00:00
@pytest.mark.parametrize ( " node_name " , [ " node " ] )
def test_cache_setting_compatibility ( cluster , node_name ) :
node = cluster . instances [ node_name ]
node . query ( " DROP TABLE IF EXISTS s3_test NO DELAY " )
node . query (
2022-11-17 17:06:59 +00:00
" CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy= ' s3_cache_r ' , compress_marks=false, compress_primary_key=false; "
2022-09-14 18:29:36 +00:00
)
node . query (
" INSERT INTO s3_test SELECT * FROM generateRandom( ' key UInt32, value String ' ) LIMIT 500 "
)
result = node . query ( " SYSTEM DROP FILESYSTEM CACHE " )
result = node . query (
" SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE ' % persistent ' "
)
assert int ( result ) == 0
node . query ( " SELECT * FROM s3_test " )
result = node . query (
" SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE ' % persistent ' "
)
assert int ( result ) > 0
config_path = os . path . join (
SCRIPT_DIR ,
f " ./ { cluster . instances_dir_name } /node/configs/config.d/storage_conf.xml " ,
)
replace_config (
config_path ,
" <do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files> " ,
" <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files> " ,
)
2022-09-19 12:02:51 +00:00
result = node . query ( " DESCRIBE FILESYSTEM CACHE ' s3_cache_r ' " )
2022-09-14 18:29:36 +00:00
assert result . strip ( ) . endswith ( " 1 " )
node . restart_clickhouse ( )
2022-09-19 12:02:51 +00:00
result = node . query ( " DESCRIBE FILESYSTEM CACHE ' s3_cache_r ' " )
2022-09-14 18:29:36 +00:00
assert result . strip ( ) . endswith ( " 0 " )
result = node . query (
" SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE ' % persistent ' "
)
assert int ( result ) > 0
node . query ( " SELECT * FROM s3_test FORMAT Null " )
assert not node . contains_in_log ( " No such file or directory: Cache info: " )
replace_config (
config_path ,
" <do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files> " ,
" <do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files> " ,
)
result = node . query (
" SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE ' % persistent ' "
)
assert int ( result ) > 0
node . restart_clickhouse ( )
2022-09-19 12:02:51 +00:00
result = node . query ( " DESCRIBE FILESYSTEM CACHE ' s3_cache_r ' " )
2022-09-14 18:29:36 +00:00
assert result . strip ( ) . endswith ( " 1 " )
node . query ( " SELECT * FROM s3_test FORMAT Null " )
assert not node . contains_in_log ( " No such file or directory: Cache info: " )