mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Change digest API to update the single hash object
This commit is contained in:
parent
f7c5602da4
commit
e352e7bfba
@ -3,7 +3,7 @@
|
||||
from hashlib import md5
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Iterable
|
||||
from typing import TYPE_CHECKING, Iterable, Optional
|
||||
from sys import modules
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -16,43 +16,42 @@ else:
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def _digest_file(file: Path) -> HASH:
|
||||
def _digest_file(file: Path, hash_object: HASH) -> None:
|
||||
assert file.is_file()
|
||||
md5_hash = md5()
|
||||
with open(file, "rb") as fd:
|
||||
for chunk in iter(lambda: fd.read(4096), b""):
|
||||
md5_hash.update(chunk)
|
||||
return md5_hash
|
||||
hash_object.update(chunk)
|
||||
|
||||
|
||||
def _digest_directory(directory: Path) -> HASH:
|
||||
def _digest_directory(directory: Path, hash_object: HASH) -> None:
|
||||
assert directory.is_dir()
|
||||
md5_hash = md5()
|
||||
for p in sorted(directory.rglob("*")):
|
||||
if p.is_symlink() and p.is_dir():
|
||||
# The symlink directory is not listed recursively, so we process it manually
|
||||
md5_hash.update(_digest_directory(p).digest())
|
||||
(_digest_directory(p, hash_object))
|
||||
if p.is_file():
|
||||
md5_hash.update(_digest_file(p).digest())
|
||||
return md5_hash
|
||||
(_digest_file(p, hash_object))
|
||||
|
||||
|
||||
def digest_path(path: Path) -> HASH:
|
||||
"""Calculates md5 hash of the path, either it's directory or file"""
|
||||
def digest_path(path: Path, hash_object: Optional[HASH] = None) -> HASH:
|
||||
"""Calculates md5 (or updates existing hash_object) hash of the path, either it's
|
||||
directory or file"""
|
||||
hash_object = hash_object or md5()
|
||||
if path.is_dir():
|
||||
return _digest_directory(path)
|
||||
if path.is_file():
|
||||
return _digest_file(path)
|
||||
return md5()
|
||||
_digest_directory(path, hash_object)
|
||||
elif path.is_file():
|
||||
_digest_file(path, hash_object)
|
||||
return hash_object
|
||||
|
||||
|
||||
def digest_paths(paths: Iterable[Path]) -> HASH:
|
||||
"""Calculates aggregated md5 hash of passed paths. The order matters"""
|
||||
md5_hash = md5()
|
||||
def digest_paths(paths: Iterable[Path], hash_object: Optional[HASH] = None) -> HASH:
|
||||
"""Calculates aggregated md5 (or updates existing hash_object) hash of passed paths.
|
||||
The order matters"""
|
||||
hash_object = hash_object or md5()
|
||||
for path in paths:
|
||||
if path.exists():
|
||||
md5_hash.update(digest_path(path).digest())
|
||||
return md5_hash
|
||||
digest_path(path, hash_object)
|
||||
return hash_object
|
||||
|
||||
|
||||
def digest_script(path_str: str) -> HASH:
|
||||
@ -65,7 +64,7 @@ def digest_script(path_str: str) -> HASH:
|
||||
script_path = getattr(script, "__file__", "")
|
||||
if parent.absolute().as_posix() in script_path:
|
||||
logger.debug("Updating the hash with %s", script_path)
|
||||
md5_hash.update(_digest_file(Path(script_path)).digest())
|
||||
_digest_file(Path(script_path), md5_hash)
|
||||
except RuntimeError:
|
||||
logger.warning("The modules size has changed, retry calculating digest")
|
||||
return digest_script(path_str)
|
||||
|
Loading…
Reference in New Issue
Block a user