Change digest API to update the single hash object

This commit is contained in:
Mikhail f. Shiryaev 2023-11-03 17:41:53 +01:00
parent f7c5602da4
commit e352e7bfba
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4

View File

@ -3,7 +3,7 @@
from hashlib import md5
from logging import getLogger
from pathlib import Path
from typing import TYPE_CHECKING, Iterable
from typing import TYPE_CHECKING, Iterable, Optional
from sys import modules
if TYPE_CHECKING:
@ -16,43 +16,42 @@ else:
logger = getLogger(__name__)
def _digest_file(file: Path) -> HASH:
def _digest_file(file: Path, hash_object: HASH) -> None:
assert file.is_file()
md5_hash = md5()
with open(file, "rb") as fd:
for chunk in iter(lambda: fd.read(4096), b""):
md5_hash.update(chunk)
return md5_hash
hash_object.update(chunk)
def _digest_directory(directory: Path) -> HASH:
def _digest_directory(directory: Path, hash_object: HASH) -> None:
assert directory.is_dir()
md5_hash = md5()
for p in sorted(directory.rglob("*")):
if p.is_symlink() and p.is_dir():
# The symlink directory is not listed recursively, so we process it manually
md5_hash.update(_digest_directory(p).digest())
(_digest_directory(p, hash_object))
if p.is_file():
md5_hash.update(_digest_file(p).digest())
return md5_hash
(_digest_file(p, hash_object))
def digest_path(path: Path) -> HASH:
"""Calculates md5 hash of the path, either it's directory or file"""
def digest_path(path: Path, hash_object: Optional[HASH] = None) -> HASH:
"""Calculates md5 (or updates existing hash_object) hash of the path, either it's
directory or file"""
hash_object = hash_object or md5()
if path.is_dir():
return _digest_directory(path)
if path.is_file():
return _digest_file(path)
return md5()
_digest_directory(path, hash_object)
elif path.is_file():
_digest_file(path, hash_object)
return hash_object
def digest_paths(paths: Iterable[Path]) -> HASH:
"""Calculates aggregated md5 hash of passed paths. The order matters"""
md5_hash = md5()
def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> HASH:
"""Calculates aggregated md5 (or updates existing hash_object) hash of passed paths.
The order matters"""
hash_object = hash_object or md5()
for path in paths:
if path.exists():
md5_hash.update(digest_path(path).digest())
return md5_hash
digest_path(path, hash_object)
return hash_object
def digest_script(path_str: str) -> HASH:
@ -65,7 +64,7 @@ def digest_script(path_str: str) -> HASH:
script_path = getattr(script, "__file__", "")
if parent.absolute().as_posix() in script_path:
logger.debug("Updating the hash with %s", script_path)
md5_hash.update(_digest_file(Path(script_path)).digest())
_digest_file(Path(script_path), md5_hash)
except RuntimeError:
logger.warning("The modules size has changed, retry calculating digest")
return digest_script(path_str)