From 53033d2347357dd874748447824c258d3b2729a9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 6 Nov 2023 16:12:48 +0100 Subject: [PATCH] Add a consistent digest and tests --- tests/ci/digest_helper.py | 11 ++- tests/ci/test_digest.py | 139 ++++++++++++++++++++++++++++++ tests/ci/tests/digests/12 | 1 + tests/ci/tests/digests/dir1/12 | 1 + tests/ci/tests/digests/dir2/12 | 1 + tests/ci/tests/digests/dir2/13 | 1 + tests/ci/tests/digests/dir3 | 1 + tests/ci/tests/digests/symlink-12 | 1 + 8 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 tests/ci/test_digest.py create mode 100644 tests/ci/tests/digests/12 create mode 100644 tests/ci/tests/digests/dir1/12 create mode 100644 tests/ci/tests/digests/dir2/12 create mode 100644 tests/ci/tests/digests/dir2/13 create mode 120000 tests/ci/tests/digests/dir3 create mode 120000 tests/ci/tests/digests/symlink-12 diff --git a/tests/ci/digest_helper.py b/tests/ci/digest_helper.py index 69a62fa62b5..543de51e46b 100644 --- a/tests/ci/digest_helper.py +++ b/tests/ci/digest_helper.py @@ -46,7 +46,7 @@ def digest_path(path: Path, hash_object: Optional[HASH] = None) -> HASH: def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> HASH: """Calculates aggregated md5 (or updates existing hash_object) hash of passed paths. - The order matters""" + The order is processed as given""" hash_object = hash_object or md5() for path in paths: if path.exists(): @@ -54,6 +54,15 @@ def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> H return hash_object +def digest_consistent_paths( + paths: Iterable[Path], hash_object: Optional[HASH] = None +) -> HASH: + """Calculates aggregated md5 (or updates existing hash_object) hash of passed paths. + The order doesn't matter, paths are converted to `absolute` and ordered before + calculation""" + return digest_paths(sorted(p.absolute() for p in paths), hash_object) + + def digest_script(path_str: str) -> HASH: """Accepts value of the __file__ executed script and calculates the md5 hash for it""" path = Path(path_str) diff --git a/tests/ci/test_digest.py b/tests/ci/test_digest.py new file mode 100644 index 00000000000..246a3226721 --- /dev/null +++ b/tests/ci/test_digest.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python + +import unittest +from hashlib import md5 +from pathlib import Path + +import digest_helper as dh + +_12 = b"12\n" +_13 = b"13\n" + + +# pylint:disable=protected-access +class TestDigests(unittest.TestCase): + tests_dir = Path("tests/digests") + broken_link = tests_dir / "broken-symlink" + empty_digest = "d41d8cd98f00b204e9800998ecf8427e" + + def test__digest_file(self): + hash_tested = md5() + with self.assertRaises( + AssertionError, msg="_digest_file shouldn't work with dirs" + ): + dh._digest_file(self.tests_dir, hash_tested) + with self.assertRaises( + AssertionError, msg="_digest_file shouldn't work with broken links" + ): + dh._digest_file(self.broken_link, hash_tested) + + # file with content '12\n' + hash_expected = md5() + hash_expected.update(_12) + dh._digest_file(self.tests_dir / "12", hash_tested) + self.assertEqual(hash_expected.digest(), hash_tested.digest()) + # symlink to '12\n' + hash_tested = md5() + dh._digest_file(self.tests_dir / "symlink-12", hash_tested) + self.assertEqual(hash_expected.digest(), hash_tested.digest()) + + def test__digest_directory(self): + hash_tested = md5() + with self.assertRaises( + AssertionError, msg="_digest_directory shouldn't work with files" + ): + dh._digest_directory(self.tests_dir / "12", hash_tested) + with self.assertRaises( + AssertionError, msg="_digest_directory shouldn't work with broken links" + ): + dh._digest_file(self.broken_link, hash_tested) + + # dir1 + hash_expected = md5() + hash_expected.update(_12) + dh._digest_directory(self.tests_dir / "dir1", hash_tested) + self.assertEqual(hash_expected.digest(), hash_tested.digest()) + + # dir2 contains 12 and 13 + hash_expected.update(_13) + hash_tested = md5() + dh._digest_directory(self.tests_dir / "dir2", hash_tested) + self.assertEqual(hash_expected.digest(), hash_tested.digest()) + + # dir3 is symlink to dir2 + hash_tested = md5() + dh._digest_directory(self.tests_dir / "dir3", hash_tested) + self.assertEqual(hash_expected.digest(), hash_tested.digest()) + + def test_digest_path(self): + # test broken link does nothing + self.assertEqual( + self.empty_digest, dh.digest_path(self.broken_link).hexdigest() + ) + # Test file works fine + hash_expected = md5() + hash_expected.update(_12) + self.assertEqual( + hash_expected.digest(), dh.digest_path(self.tests_dir / "12").digest() + ) + # Test directory works fine + hash_expected = md5() + hash_expected.update(_12) + self.assertEqual( + hash_expected.digest(), dh.digest_path(self.tests_dir / "dir1").digest() + ) + # Test existed hash is updated from symlink dir3 + hash_tested = hash_expected.copy() + dh.digest_path(self.tests_dir / "dir3", hash_tested) + hash_expected = md5() + hash_expected.update(_12 + _12 + _13) + self.assertEqual(hash_expected.digest(), hash_tested.digest()) + # Test the full content of the following structure + # tests/digests + # ├── 12 + # ├── dir1 + # │   └── 12 + # ├── dir2 + # │   ├── 12 + # │   └── 13 + # ├── dir3 -> dir2 + # └── symlink-12 -> 12 + hash_expected = md5() + hash_expected.update(_12 * 3 + (_13 + _12) * 2) + self.assertEqual( + hash_expected.digest(), dh.digest_path(self.tests_dir).digest() + ) + + def test_digest_paths(self): + # test paths order matters + hash_ordered = dh.digest_paths( + (self.tests_dir / d for d in ("dir1", "dir2", "dir3")) + ) + hash_reversed = dh.digest_paths( + (self.tests_dir / d for d in ("dir3", "dir2", "dir1")) + ) + hash_unordered = dh.digest_paths( + (self.tests_dir / d for d in ("dir3", "dir1", "dir2")) + ) + self.assertNotEqual(hash_ordered.digest(), hash_unordered.digest()) + self.assertNotEqual(hash_ordered.digest(), hash_reversed.digest()) + self.assertNotEqual(hash_unordered.digest(), hash_reversed.digest()) + + def test_digest_consistent_paths(self): + # test paths order does not matter + hash_ordered = dh.digest_consistent_paths( + (self.tests_dir / d for d in ("dir1", "dir2", "dir3")) + ) + hash_reversed = dh.digest_consistent_paths( + (self.tests_dir / d for d in ("dir3", "dir2", "dir1")) + ) + self.assertEqual(hash_ordered.digest(), hash_reversed.digest()) + + @classmethod + def setUpClass(cls): + # create a broken symlink + (TestDigests.broken_link).symlink_to("non-existent-link") + + @classmethod + def tearDownClass(cls): + (TestDigests.broken_link).unlink() diff --git a/tests/ci/tests/digests/12 b/tests/ci/tests/digests/12 new file mode 100644 index 00000000000..48082f72f08 --- /dev/null +++ b/tests/ci/tests/digests/12 @@ -0,0 +1 @@ +12 diff --git a/tests/ci/tests/digests/dir1/12 b/tests/ci/tests/digests/dir1/12 new file mode 100644 index 00000000000..48082f72f08 --- /dev/null +++ b/tests/ci/tests/digests/dir1/12 @@ -0,0 +1 @@ +12 diff --git a/tests/ci/tests/digests/dir2/12 b/tests/ci/tests/digests/dir2/12 new file mode 100644 index 00000000000..48082f72f08 --- /dev/null +++ b/tests/ci/tests/digests/dir2/12 @@ -0,0 +1 @@ +12 diff --git a/tests/ci/tests/digests/dir2/13 b/tests/ci/tests/digests/dir2/13 new file mode 100644 index 00000000000..b1bd38b62a0 --- /dev/null +++ b/tests/ci/tests/digests/dir2/13 @@ -0,0 +1 @@ +13 diff --git a/tests/ci/tests/digests/dir3 b/tests/ci/tests/digests/dir3 new file mode 120000 index 00000000000..1e039be9000 --- /dev/null +++ b/tests/ci/tests/digests/dir3 @@ -0,0 +1 @@ +dir2 \ No newline at end of file diff --git a/tests/ci/tests/digests/symlink-12 b/tests/ci/tests/digests/symlink-12 new file mode 120000 index 00000000000..3cacc0b93c9 --- /dev/null +++ b/tests/ci/tests/digests/symlink-12 @@ -0,0 +1 @@ +12 \ No newline at end of file