Implement digest helpers for different objects

This commit is contained in:
Mikhail f. Shiryaev 2023-11-03 16:07:05 +01:00
parent 7e4742f68a
commit 3e8ad14423
No known key found for this signature in database
GPG Key ID: 4B02ED204C7D93F4
3 changed files with 65 additions and 12 deletions

View File

@ -3,13 +3,13 @@
import logging
import os
import shutil
from hashlib import md5
from pathlib import Path
import requests # type: ignore
from build_download_helper import download_build_with_progress, DownloadException
from compress_files import decompress_fast, compress_fast
from digest_helper import digest_path
from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET
from git_helper import git_runner
from s3_helper import S3Helper
@ -108,7 +108,7 @@ class CargoCache:
s3_helper: S3Helper,
):
self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock"
self.lock_hash = md5(self._cargo_lock_file.read_bytes()).hexdigest()
self.lock_hash = digest_path(self._cargo_lock_file).hexdigest()
self.directory = directory
self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst"
self.temp_path = temp_path

63
tests/ci/digest_helper.py Normal file
View File

@ -0,0 +1,63 @@
#!/usr/bin/env python3
from hashlib import md5
from logging import getLogger
from pathlib import Path
from typing import TYPE_CHECKING
from sys import modules
if TYPE_CHECKING:
from hashlib import ( # pylint:disable=no-name-in-module,ungrouped-imports
_Hash as HASH,
)
else:
HASH = "_Hash"
logger = getLogger(__name__)
def _digest_file(file: Path) -> HASH:
assert file.is_file()
md5_hash = md5()
with open(file, "rb") as fd:
for chunk in iter(lambda: fd.read(4096), b""):
md5_hash.update(chunk)
return md5_hash
def _digest_directory(directory: Path) -> HASH:
assert directory.is_dir()
md5_hash = md5()
for p in sorted(directory.rglob("*")):
if p.is_symlink() and p.is_dir():
# The symlink directory is not listed recursively, so we process it manually
md5_hash.update(_digest_directory(p).digest())
if p.is_file():
md5_hash.update(_digest_file(p).digest())
return md5_hash
def digest_path(path: Path) -> HASH:
"""Calculates md5 hash of the path, either it's directory or file"""
if path.is_dir():
return _digest_directory(path)
if path.is_file():
return _digest_file(path)
return md5()
def digest_script(path_str: str) -> HASH:
"""Accepts value of the __file__ executed script and calculates the md5 hash for it"""
path = Path(path_str)
parent = path.parent
md5_hash = md5()
try:
for script in modules.values():
script_path = getattr(script, "__file__", "")
if parent.absolute().as_posix() in script_path:
logger.debug("Updating the hash with %s", script_path)
md5_hash.update(_digest_file(Path(script_path)).digest())
except RuntimeError:
logger.warning("The modules size has changed, retry calculating digest")
return digest_script(path_str)
return md5_hash

View File

@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
import hashlib
import logging
import re
import shutil
@ -22,15 +21,6 @@ from env_helper import (
from compress_files import compress_file_fast
def _md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
logging.debug("MD5 for %s is %s", fname, hash_md5.hexdigest())
return hash_md5.hexdigest()
def _flatten_list(lst):
result = []
for elem in lst: