mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Implement digest helpers for different objects
This commit is contained in:
parent
7e4742f68a
commit
3e8ad14423
@ -3,13 +3,13 @@
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
|
||||
import requests # type: ignore
|
||||
|
||||
from build_download_helper import download_build_with_progress, DownloadException
|
||||
from compress_files import decompress_fast, compress_fast
|
||||
from digest_helper import digest_path
|
||||
from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET
|
||||
from git_helper import git_runner
|
||||
from s3_helper import S3Helper
|
||||
@ -108,7 +108,7 @@ class CargoCache:
|
||||
s3_helper: S3Helper,
|
||||
):
|
||||
self._cargo_lock_file = Path(git_runner.cwd) / "rust" / "Cargo.lock"
|
||||
self.lock_hash = md5(self._cargo_lock_file.read_bytes()).hexdigest()
|
||||
self.lock_hash = digest_path(self._cargo_lock_file).hexdigest()
|
||||
self.directory = directory
|
||||
self.archive_name = f"Cargo_cache_{self.lock_hash}.tar.zst"
|
||||
self.temp_path = temp_path
|
||||
|
63
tests/ci/digest_helper.py
Normal file
63
tests/ci/digest_helper.py
Normal file
@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from hashlib import md5
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from sys import modules
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from hashlib import ( # pylint:disable=no-name-in-module,ungrouped-imports
|
||||
_Hash as HASH,
|
||||
)
|
||||
else:
|
||||
HASH = "_Hash"
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def _digest_file(file: Path) -> HASH:
|
||||
assert file.is_file()
|
||||
md5_hash = md5()
|
||||
with open(file, "rb") as fd:
|
||||
for chunk in iter(lambda: fd.read(4096), b""):
|
||||
md5_hash.update(chunk)
|
||||
return md5_hash
|
||||
|
||||
|
||||
def _digest_directory(directory: Path) -> HASH:
|
||||
assert directory.is_dir()
|
||||
md5_hash = md5()
|
||||
for p in sorted(directory.rglob("*")):
|
||||
if p.is_symlink() and p.is_dir():
|
||||
# The symlink directory is not listed recursively, so we process it manually
|
||||
md5_hash.update(_digest_directory(p).digest())
|
||||
if p.is_file():
|
||||
md5_hash.update(_digest_file(p).digest())
|
||||
return md5_hash
|
||||
|
||||
|
||||
def digest_path(path: Path) -> HASH:
|
||||
"""Calculates md5 hash of the path, either it's directory or file"""
|
||||
if path.is_dir():
|
||||
return _digest_directory(path)
|
||||
if path.is_file():
|
||||
return _digest_file(path)
|
||||
return md5()
|
||||
|
||||
|
||||
def digest_script(path_str: str) -> HASH:
|
||||
"""Accepts value of the __file__ executed script and calculates the md5 hash for it"""
|
||||
path = Path(path_str)
|
||||
parent = path.parent
|
||||
md5_hash = md5()
|
||||
try:
|
||||
for script in modules.values():
|
||||
script_path = getattr(script, "__file__", "")
|
||||
if parent.absolute().as_posix() in script_path:
|
||||
logger.debug("Updating the hash with %s", script_path)
|
||||
md5_hash.update(_digest_file(Path(script_path)).digest())
|
||||
except RuntimeError:
|
||||
logger.warning("The modules size has changed, retry calculating digest")
|
||||
return digest_script(path_str)
|
||||
return md5_hash
|
@ -1,5 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import hashlib
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
@ -22,15 +21,6 @@ from env_helper import (
|
||||
from compress_files import compress_file_fast
|
||||
|
||||
|
||||
def _md5(fname):
|
||||
hash_md5 = hashlib.md5()
|
||||
with open(fname, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_md5.update(chunk)
|
||||
logging.debug("MD5 for %s is %s", fname, hash_md5.hexdigest())
|
||||
return hash_md5.hexdigest()
|
||||
|
||||
|
||||
def _flatten_list(lst):
|
||||
result = []
|
||||
for elem in lst:
|
||||
|
Loading…
Reference in New Issue
Block a user