From f1f354f22b5d1573e970e5d2a5e2ea540e3f99f8 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 13 Jun 2024 15:33:59 +0000 Subject: [PATCH] add test for granularity and total byte size of resource requests --- tests/integration/test_scheduler/test.py | 67 ++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/integration/test_scheduler/test.py b/tests/integration/test_scheduler/test.py index 8e37bd8d403..9940e16ea42 100644 --- a/tests/integration/test_scheduler/test.py +++ b/tests/integration/test_scheduler/test.py @@ -28,6 +28,73 @@ def start_cluster(): cluster.shutdown() +def test_s3_resource_request_granularity(): + node.query( + f""" + drop table if exists data; + create table data (key UInt64 CODEC(NONE), value String CODEC(NONE)) engine=MergeTree() order by key settings min_bytes_for_wide_part=1e9, storage_policy='s3'; + """ + ) + + total_bytes = 50000000 # Approximate data size + max_bytes_per_request = 2000000 # Should be ~1MB or less in general + min_bytes_per_request = 6000 # Small requests are ok, but we don't want hurt performance with too often resource requests + + writes_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/admin'" + ).strip() + ) + write_bytes_before = int( + node.query( + f"select dequeued_cost from system.scheduler where resource='network_write' and path='/prio/admin'" + ).strip() + ) + node.query(f"insert into data select number, randomString(10000000) from numbers(5) SETTINGS workload='admin'") + writes_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_write' and path='/prio/admin'" + ).strip() + ) + write_bytes_after = int( + node.query( + f"select dequeued_cost from system.scheduler where resource='network_write' and path='/prio/admin'" + ).strip() + ) + + assert write_bytes_after - write_bytes_before > 1.0 * total_bytes + assert write_bytes_after - write_bytes_before < 1.2 * total_bytes + assert (write_bytes_after - write_bytes_before) / (writes_after - writes_before) < max_bytes_per_request + assert (write_bytes_after - write_bytes_before) / (writes_after - writes_before) > min_bytes_per_request + + reads_before = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/admin'" + ).strip() + ) + read_bytes_before = int( + node.query( + f"select dequeued_cost from system.scheduler where resource='network_read' and path='/prio/admin'" + ).strip() + ) + node.query(f"select count() from data where not ignore(*) SETTINGS workload='admin'") + reads_after = int( + node.query( + f"select dequeued_requests from system.scheduler where resource='network_read' and path='/prio/admin'" + ).strip() + ) + read_bytes_after = int( + node.query( + f"select dequeued_cost from system.scheduler where resource='network_read' and path='/prio/admin'" + ).strip() + ) + + assert read_bytes_after - read_bytes_before > 1.0 * total_bytes + assert read_bytes_after - read_bytes_before < 1.2 * total_bytes + assert (read_bytes_after - read_bytes_before) / (reads_after - reads_before) < max_bytes_per_request + assert (read_bytes_after - read_bytes_before) / (reads_after - reads_before) > min_bytes_per_request + + def test_s3_disk(): node.query( f"""