ClickHouse/ci/praktika/cache.py

import dataclasses
import json
from pathlib import Path

from praktika import Artifact, Job, Workflow
from praktika._environment import _Environment
from praktika.digest import Digest
from praktika.s3 import S3
from praktika.settings import Settings
from praktika.utils import Utils


class Cache:
    @dataclasses.dataclass
    class CacheRecord:
        class Type:
            SUCCESS = "success"

        type: str
        sha: str
        pr_number: int
        branch: str

        def dump(self, path):
            with open(path, "w", encoding="utf8") as f:
                json.dump(dataclasses.asdict(self), f)

        @classmethod
        def from_fs(cls, path):
            with open(path, "r", encoding="utf8") as f:
                return Cache.CacheRecord(**json.load(f))

        @classmethod
        def from_dict(cls, obj):
            return Cache.CacheRecord(**obj)

    def __init__(self):
        self.digest = Digest()
        self.success = {}  # type Dict[str, Any]

    @classmethod
    def push_success_record(cls, job_name, job_digest, sha):
        type_ = Cache.CacheRecord.Type.SUCCESS
        record = Cache.CacheRecord(
            type=type_,
            sha=sha,
            pr_number=_Environment.get().PR_NUMBER,
            branch=_Environment.get().BRANCH,
        )
        assert (
            Settings.CACHE_S3_PATH
        ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"
        record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}"
        record_file = Path(Settings.TEMP_DIR) / type_
        record.dump(record_file)
        S3.copy_file_to_s3(s3_path=record_path, local_path=record_file)
        record_file.unlink()

    def fetch_success(self, job_name, job_digest):
        type_ = Cache.CacheRecord.Type.SUCCESS
        assert (
            Settings.CACHE_S3_PATH
        ), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"
        record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}/{type_}"
        record_file_local_dir = (
            f"{Settings.CACHE_LOCAL_PATH}/{Utils.normalize_string(job_name)}/"
        )
        Path(record_file_local_dir).mkdir(parents=True, exist_ok=True)

        if S3.head_object(record_path):
            res = S3.copy_file_from_s3(
                s3_path=record_path, local_path=record_file_local_dir
            )
        else:
            res = None

        if res:
            print(f"Cache record found, job [{job_name}], digest [{job_digest}]")
            self.success[job_name] = True
            return Cache.CacheRecord.from_fs(Path(record_file_local_dir) / type_)
        return None


if __name__ == "__main__":
    # test
    c = Cache()
    workflow = Workflow.Config(
        name="TEST",
        event=Workflow.Event.PULL_REQUEST,
        jobs=[
            Job.Config(
                name="JobA",
                runs_on=["some"],
                command="python -m unittest ./ci/tests/example_1/test_example_produce_artifact.py",
                provides=["greet"],
                job_requirements=Job.Requirements(
                    python_requirements_txt="./ci/requirements.txt"
                ),
                digest_config=Job.CacheDigestConfig(
                    # example: use glob to include files
                    include_paths=["./ci/tests/example_1/test_example_consume*.py"],
                ),
            ),
            Job.Config(
                name="JobB",
                runs_on=["some"],
                command="python -m unittest ./ci/tests/example_1/test_example_consume_artifact.py",
                requires=["greet"],
                job_requirements=Job.Requirements(
                    python_requirements_txt="./ci/requirements.txt"
                ),
                digest_config=Job.CacheDigestConfig(
                    # example: use dir to include files recursively
                    include_paths=["./ci/tests/example_1"],
                    # example: use glob to exclude files from digest
                    exclude_paths=[
                        "./ci/tests/example_1/test_example_consume*",
                        "./**/*.pyc",
                    ],
                ),
            ),
        ],
        artifacts=[Artifact.Config(type="s3", name="greet", path="hello")],
        enable_cache=True,
    )
    for job in workflow.jobs:
        print(c.digest.calc_job_digest(job))
CI: FastTest with praktika 2024-10-01 19:19:35 +00:00			`import dataclasses`
			`import json`
			`from pathlib import Path`

			`from praktika import Artifact, Job, Workflow`
			`from praktika._environment import _Environment`
			`from praktika.digest import Digest`
			`from praktika.s3 import S3`
			`from praktika.settings import Settings`
			`from praktika.utils import Utils`


			`class Cache:`
			`@dataclasses.dataclass`
			`class CacheRecord:`
			`class Type:`
			`SUCCESS = "success"`

			`type: str`
			`sha: str`
			`pr_number: int`
			`branch: str`

			`def dump(self, path):`
			`with open(path, "w", encoding="utf8") as f:`
			`json.dump(dataclasses.asdict(self), f)`

			`@classmethod`
			`def from_fs(cls, path):`
			`with open(path, "r", encoding="utf8") as f:`
			`return Cache.CacheRecord(**json.load(f))`

			`@classmethod`
			`def from_dict(cls, obj):`
			`return Cache.CacheRecord(**obj)`

			`def __init__(self):`
			`self.digest = Digest()`
			`self.success = {} # type Dict[str, Any]`

			`@classmethod`
			`def push_success_record(cls, job_name, job_digest, sha):`
			`type_ = Cache.CacheRecord.Type.SUCCESS`
			`record = Cache.CacheRecord(`
			`type=type_,`
			`sha=sha,`
			`pr_number=_Environment.get().PR_NUMBER,`
			`branch=_Environment.get().BRANCH,`
			`)`
			`assert (`
			`Settings.CACHE_S3_PATH`
			`), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"`
			`record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}"`
			`record_file = Path(Settings.TEMP_DIR) / type_`
			`record.dump(record_file)`
			`S3.copy_file_to_s3(s3_path=record_path, local_path=record_file)`
			`record_file.unlink()`

			`def fetch_success(self, job_name, job_digest):`
			`type_ = Cache.CacheRecord.Type.SUCCESS`
			`assert (`
			`Settings.CACHE_S3_PATH`
			`), f"Setting CACHE_S3_PATH must be defined with enabled CI Cache"`
			`record_path = f"{Settings.CACHE_S3_PATH}/v{Settings.CACHE_VERSION}/{Utils.normalize_string(job_name)}/{job_digest}/{type_}"`
			`record_file_local_dir = (`
			`f"{Settings.CACHE_LOCAL_PATH}/{Utils.normalize_string(job_name)}/"`
			`)`
			`Path(record_file_local_dir).mkdir(parents=True, exist_ok=True)`

			`if S3.head_object(record_path):`
			`res = S3.copy_file_from_s3(`
			`s3_path=record_path, local_path=record_file_local_dir`
			`)`
			`else:`
			`res = None`

			`if res:`
			`print(f"Cache record found, job [{job_name}], digest [{job_digest}]")`
			`self.success[job_name] = True`
			`return Cache.CacheRecord.from_fs(Path(record_file_local_dir) / type_)`
			`return None`


			`if __name__ == "__main__":`
			`# test`
			`c = Cache()`
			`workflow = Workflow.Config(`
			`name="TEST",`
			`event=Workflow.Event.PULL_REQUEST,`
			`jobs=[`
			`Job.Config(`
			`name="JobA",`
			`runs_on=["some"],`
			`command="python -m unittest ./ci/tests/example_1/test_example_produce_artifact.py",`
			`provides=["greet"],`
			`job_requirements=Job.Requirements(`
			`python_requirements_txt="./ci/requirements.txt"`
			`),`
			`digest_config=Job.CacheDigestConfig(`
			`# example: use glob to include files`
			`include_paths=["./ci/tests/example_1/test_example_consume*.py"],`
			`),`
			`),`
			`Job.Config(`
			`name="JobB",`
			`runs_on=["some"],`
			`command="python -m unittest ./ci/tests/example_1/test_example_consume_artifact.py",`
			`requires=["greet"],`
			`job_requirements=Job.Requirements(`
			`python_requirements_txt="./ci/requirements.txt"`
			`),`
			`digest_config=Job.CacheDigestConfig(`
			`# example: use dir to include files recursively`
			`include_paths=["./ci/tests/example_1"],`
			`# example: use glob to exclude files from digest`
			`exclude_paths=[`
			`"./ci/tests/example_1/test_example_consume*",`
			`"./*/.pyc",`
			`],`
			`),`
			`),`
			`],`
			`artifacts=[Artifact.Config(type="s3", name="greet", path="hello")],`
			`enable_cache=True,`
			`)`
			`for job in workflow.jobs:`
			`print(c.digest.calc_job_digest(job))`