From 499ff702957012a0bb242a5a20d799631ef97316 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Jun 2022 19:45:06 +0300 Subject: [PATCH 1/5] tests: improve test_log_family_s3 in case of failures Signed-off-by: Azat Khuzhin --- tests/integration/test_log_family_s3/test.py | 31 ++++++++++---------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 234b079ba00..9fde311d89a 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -57,21 +57,22 @@ def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_i ) ) - node.query("INSERT INTO s3_test SELECT number FROM numbers(5)") - assert node.query("SELECT * FROM s3_test") == "0\n1\n2\n3\n4\n" - assert_objects_count(cluster, files_overhead_per_insert + files_overhead) + try: + node.query("INSERT INTO s3_test SELECT number FROM numbers(5)") + assert node.query("SELECT * FROM s3_test") == "0\n1\n2\n3\n4\n" + assert_objects_count(cluster, files_overhead_per_insert + files_overhead) - node.query("INSERT INTO s3_test SELECT number + 5 FROM numbers(3)") - assert node.query("SELECT * FROM s3_test order by id") == "0\n1\n2\n3\n4\n5\n6\n7\n" - assert_objects_count(cluster, files_overhead_per_insert * 2 + files_overhead) + node.query("INSERT INTO s3_test SELECT number + 5 FROM numbers(3)") + assert node.query("SELECT * FROM s3_test order by id") == "0\n1\n2\n3\n4\n5\n6\n7\n" + assert_objects_count(cluster, files_overhead_per_insert * 2 + files_overhead) - node.query("INSERT INTO s3_test SELECT number + 8 FROM numbers(1)") - assert ( - node.query("SELECT * FROM s3_test order by id") == "0\n1\n2\n3\n4\n5\n6\n7\n8\n" - ) - assert_objects_count(cluster, files_overhead_per_insert * 3 + files_overhead) + node.query("INSERT INTO s3_test SELECT number + 8 FROM numbers(1)") + assert ( + node.query("SELECT * FROM s3_test order by id") == "0\n1\n2\n3\n4\n5\n6\n7\n8\n" + ) + assert_objects_count(cluster, files_overhead_per_insert * 3 + files_overhead) - node.query("TRUNCATE TABLE s3_test") - assert_objects_count(cluster, 0) - - node.query("DROP TABLE s3_test") + node.query("TRUNCATE TABLE s3_test") + assert_objects_count(cluster, 0) + finally: + node.query("DROP TABLE s3_test") From ee45bb3c65224bf8952470de3b868b406e06e186 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Jun 2022 19:46:06 +0300 Subject: [PATCH 2/5] tests: use id for parametrized tests in test_log_family_s3 Signed-off-by: Azat Khuzhin --- tests/integration/test_log_family_s3/test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 9fde311d89a..11b012dfab4 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -46,7 +46,11 @@ def assert_objects_count(cluster, objects_count, path="data/"): # files_overhead=1, files_overhead_per_insert=2 @pytest.mark.parametrize( "log_engine,files_overhead,files_overhead_per_insert", - [("TinyLog", 1, 1), ("Log", 1, 2), ("StripeLog", 1, 2)], + [ + pytest.param("TinyLog", 1, 1, id="TinyLog"), + pytest.param("Log", 1, 2, id="Log"), + pytest.param("StripeLog", 1, 2, id="StripeLog"), + ], ) def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_insert): node = cluster.instances["node"] From 8ddf277670b9fd0474f1713c9bb3563697678f42 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Jun 2022 19:46:43 +0300 Subject: [PATCH 3/5] tests: test_log_family_s3 apply black Signed-off-by: Azat Khuzhin --- tests/integration/test_log_family_s3/test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 11b012dfab4..76ff0930db3 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -67,12 +67,16 @@ def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_i assert_objects_count(cluster, files_overhead_per_insert + files_overhead) node.query("INSERT INTO s3_test SELECT number + 5 FROM numbers(3)") - assert node.query("SELECT * FROM s3_test order by id") == "0\n1\n2\n3\n4\n5\n6\n7\n" + assert ( + node.query("SELECT * FROM s3_test order by id") + == "0\n1\n2\n3\n4\n5\n6\n7\n" + ) assert_objects_count(cluster, files_overhead_per_insert * 2 + files_overhead) node.query("INSERT INTO s3_test SELECT number + 8 FROM numbers(1)") assert ( - node.query("SELECT * FROM s3_test order by id") == "0\n1\n2\n3\n4\n5\n6\n7\n8\n" + node.query("SELECT * FROM s3_test order by id") + == "0\n1\n2\n3\n4\n5\n6\n7\n8\n" ) assert_objects_count(cluster, files_overhead_per_insert * 3 + files_overhead) From 71285edfbd4f30beae8e3d45b40c8394bb57c0b0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 2 Jun 2022 18:09:42 +0300 Subject: [PATCH 4/5] Fix reading of empty S3 files It is possible for ReadBufferFromS3::nextImpl() called even after eof(), at least once, and in this case, if the file was empty, then local working_buffer will be null, while impl.working_buffer will be empty, but not null, and so local position() after impl->position() = position() will be incorrect. I found this with test_storage_s3/test.py::test_empty_file in debug build, assertion catched this, so maybe it worth get back debug integration build... v2: fix test_log_family_s3 failures https://s3.amazonaws.com/clickhouse-test-reports/37801/b5e6e2ddae94d6a7eac551309cb67003dff97df1/integration_tests__asan__actions__[2/3].html Signed-off-by: Azat Khuzhin --- src/IO/ReadBufferFromS3.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 5b1d278f4c2..1a358f79a8b 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -116,6 +116,11 @@ bool ReadBufferFromS3::nextImpl() assert(working_buffer.begin() != nullptr); assert(!internal_buffer.empty()); } + else + { + /// use the buffer returned by `impl` + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); + } } /// Try to read a next portion of data. @@ -155,7 +160,7 @@ bool ReadBufferFromS3::nextImpl() if (!next_result) return false; - BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl` + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, working_buffer.size()); offset += working_buffer.size(); From 5d0a185cb3f69a5591fa137bd2cd5a2e777aa608 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Jun 2022 21:27:27 +0300 Subject: [PATCH 5/5] Fix assertion for getImplementationBufferOffset() for Log family on S3 Test test_log_family_s3/test.py::test_log_family_s3[TinyLog]: clickhouse: ./src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp:213: virtual bool DB::AsynchronousReadIndirectBufferFromRemoteFS::nextImpl(): Assertion `file_offset_of_buffer_end == impl->getImplementationBufferOffset()' failed. v2: fix assertion instead of adjusting file_offset_of_buffer_end in ReadBufferFromRemoteFSGather.cpp Signed-off-by: Azat Khuzhin --- src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index 5f2e19162f4..ca151a8608e 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -210,7 +210,11 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); - assert(file_offset_of_buffer_end == impl->getImplementationBufferOffset()); + /// In case of multiple files for the same file in clickhouse (i.e. log family) + /// file_offset_of_buffer_end will not match getImplementationBufferOffset() + /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()] + assert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset()); + assert(file_offset_of_buffer_end <= impl->getFileSize()); prefetch_future = {}; return size;