diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74b1b0d2..1c9dac49 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10' ] + python-version: ['3.8', '3.9', '3.10' ] name: Base (${{ matrix.python-version }}) @@ -58,7 +58,7 @@ jobs: strategy: matrix: - python-version: [ '3.7', '3.10' ] + python-version: ['3.10' ] name: Production (${{ matrix.python-version }}) @@ -96,7 +96,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ '3.7', '3.10' ] + python-version: ['3.10' ] name: Tests (${{ matrix.python-version }}) @@ -120,11 +120,6 @@ jobs: with: node-version: 18 - - uses: actions/setup-node@v3 - if: ${{ matrix.python-version == '3.7' }} - with: - node-version: 14 - - run: | wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb; sudo apt install ./wkhtmltox_0.12.6-1.focal_amd64.deb; diff --git a/README.md b/README.md index e523672c..3a6eb130 100755 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Bench is a command-line utility that helps you to install, update, and manage mu
- + diff --git a/bench/app.py b/bench/app.py index a4b3dc74..e99eca95 100755 --- a/bench/app.py +++ b/bench/app.py @@ -6,6 +6,7 @@ import re import shutil import subprocess import sys +import uuid import tarfile import typing from collections import OrderedDict @@ -34,6 +35,7 @@ from bench.utils import ( is_valid_frappe_branch, log, run_frappe_cmd, + get_file_md5, ) from bench.utils.bench import build_assets, install_python_dev_dependencies from bench.utils.render import step @@ -338,45 +340,51 @@ class App(AppMeta): def get_app_path(self) -> Path: return Path(self.bench.name) / "apps" / self.app_name - def get_app_cache_path(self, is_compressed=False) -> Path: + def get_app_cache_temp_path(self, is_compressed=False) -> Path: + cache_path = get_bench_cache_path("apps") + ext = "tgz" if is_compressed else "tar" + tarfile_name = f"{self.app_name}.{uuid.uuid4().hex}.{ext}" + return cache_path / tarfile_name + + def get_app_cache_hashed_path(self, temp_path: Path) -> Path: assert self.cache_key is not None - cache_path = get_bench_cache_path("apps") - tarfile_name = get_cache_filename( - self.app_name, - self.cache_key, - is_compressed, - ) - return cache_path / tarfile_name + ext = temp_path.suffix[1:] + md5 = get_file_md5(temp_path) + tarfile_name = f"{self.app_name}.{self.cache_key}.md5-{md5}.{ext}" + + return temp_path.with_name(tarfile_name) def get_cached(self) -> bool: if not self.cache_key: return False - cache_path = self.get_app_cache_path(False) - mode = "r" - - # Check if cache exists without gzip - if not cache_path.is_file(): - cache_path = self.get_app_cache_path(True) - mode = "r:gz" - - # Check if cache exists with gzip - if not cache_path.is_file(): + if not (cache_path := validate_cache_and_get_path(self.app_name, self.cache_key)): return False app_path = self.get_app_path() if app_path.is_dir(): shutil.rmtree(app_path) - click.secho(f"Getting {self.app_name} from cache", fg="yellow") + click.secho( + f"Bench app-cache: extracting {self.app_name} from {cache_path.as_posix()}", + ) + + mode = "r:gz" if cache_path.suffix.endswith(".tgz") else "r" with tarfile.open(cache_path, mode) as tar: extraction_filter = get_app_cache_extract_filter(count_threshold=150_000) try: tar.extractall(app_path.parent, filter=extraction_filter) + click.secho( + f"Bench app-cache: extraction succeeded for {self.app_name}", + fg="green", + ) except Exception: - message = f"Cache extraction failed for {self.app_name}, skipping cache" - click.secho(message, fg="yellow") + message = f"Bench app-cache: extraction failed for {self.app_name}" + click.secho( + message, + fg="yellow", + ) logger.exception(message) shutil.rmtree(app_path) return False @@ -392,10 +400,10 @@ class App(AppMeta): return False cwd = os.getcwd() - cache_path = self.get_app_cache_path(compress_artifacts) + cache_path = self.get_app_cache_temp_path(compress_artifacts) mode = "w:gz" if compress_artifacts else "w" - message = f"Caching {self.app_name} app directory" + message = f"Bench app-cache: caching {self.app_name}" if compress_artifacts: message += " (compressed)" click.secho(message) @@ -407,9 +415,19 @@ class App(AppMeta): try: with tarfile.open(cache_path, mode) as tar: tar.add(app_path.name) + + hashed_path = self.get_app_cache_hashed_path(cache_path) + unlink_no_throw(hashed_path) + + cache_path.rename(hashed_path) + click.secho( + f"Bench app-cache: caching succeeded for {self.app_name} as {hashed_path.as_posix()}", + fg="green", + ) + success = True - except Exception: - log(f"Failed to cache {app_path}", level=3) + except Exception as exc: + log(f"Bench app-cache: caching failed for {self.app_name} {exc}", level=3) success = False finally: os.chdir(cwd) @@ -437,28 +455,11 @@ def can_get_cached(app_name: str, cache_key: str) -> bool: checking local remote and fetching can be skipped while keeping get-app command params the same. """ - cache_path = get_bench_cache_path("apps") - tarfile_path = cache_path / get_cache_filename( - app_name, - cache_key, - True, - ) - if tarfile_path.is_file(): - return True + if cache_path := get_app_cache_path(app_name, cache_key): + return cache_path.exists() - tarfile_path = cache_path / get_cache_filename( - app_name, - cache_key, - False, - ) - - return tarfile_path.is_file() - - -def get_cache_filename(app_name: str, cache_key: str, is_compressed=False): - ext = "tgz" if is_compressed else "tar" - return f"{app_name}-{cache_key[:10]}.{ext}" + return False def can_frappe_use_cached(app: App) -> bool: @@ -482,7 +483,10 @@ def can_frappe_use_cached(app: App) -> bool: """ return sv.Version("15.12.0") not in sv.SimpleSpec(min_frappe) except ValueError: - click.secho(f"Invalid value found for frappe version '{min_frappe}'", fg="yellow") + click.secho( + f"Bench app-cache: invalid value found for frappe version '{min_frappe}'", + fg="yellow", + ) # Invalid expression return False @@ -591,6 +595,10 @@ def remove_unused_node_modules(app_path: Path) -> None: can_delete = "vite build" in build_script if can_delete: + click.secho( + f"Bench app-cache: removing {node_modules.as_posix()}", + fg="yellow", + ) shutil.rmtree(node_modules) @@ -1036,3 +1044,58 @@ def get_apps_json(path): with open(path) as f: return json.load(f) + + +def is_cache_hash_valid(cache_path: Path) -> bool: + parts = cache_path.name.split(".") + if len(parts) < 2 or not parts[-2].startswith("md5-"): + return False + + md5 = parts[-2].split("-")[1] + return get_file_md5(cache_path) == md5 + + +def unlink_no_throw(path: Path): + if not path.exists(): + return + + try: + path.unlink(True) + except Exception: + pass + + +def get_app_cache_path(app_name: str, cache_key: str) -> "Optional[Path]": + cache_path = get_bench_cache_path("apps") + glob_pattern = f"{app_name}.{cache_key}.md5-*" + + for app_cache_path in cache_path.glob(glob_pattern): + return app_cache_path + + return None + + +def validate_cache_and_get_path(app_name: str, cache_key: str) -> "Optional[Path]": + if not cache_key: + return + + if not (cache_path := get_app_cache_path(app_name, cache_key)): + return + + if not cache_path.is_file(): + click.secho( + f"Bench app-cache: file check failed for {cache_path.as_posix()}, skipping cache", + fg="yellow", + ) + unlink_no_throw(cache_path) + return + + if not is_cache_hash_valid(cache_path): + click.secho( + f"Bench app-cache: hash validation failed for {cache_path.as_posix()}, skipping cache", + fg="yellow", + ) + unlink_no_throw(cache_path) + return + + return cache_path diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index 4b9e65b6..aee8f4e8 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -5,6 +5,7 @@ import os import re import subprocess import sys +import hashlib from functools import lru_cache from glob import glob from pathlib import Path @@ -23,6 +24,12 @@ from bench.exceptions import ( InvalidRemoteException, ) +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Optional + + logger = logging.getLogger(PROJECT_NAME) paths_in_app = ("hooks.py", "modules.txt", "patches.txt") paths_in_bench = ("apps", "sites", "config", "logs", "config/pids") @@ -52,6 +59,7 @@ def is_frappe_app(directory: str) -> bool: return bool(is_frappe_app) + def get_bench_cache_path(sub_dir: Optional[str]) -> Path: relative_path = "~/.cache/bench" if sub_dir and not sub_dir.startswith("/"): @@ -62,6 +70,7 @@ def get_bench_cache_path(sub_dir: Optional[str]) -> Path: cache_path.mkdir(parents=True, exist_ok=True) return cache_path + @lru_cache(maxsize=None) def is_valid_frappe_branch(frappe_path: str, frappe_branch: str): """Check if a branch exists in a repo. Throws InvalidRemoteException if branch is not found @@ -417,7 +426,7 @@ def get_env_frappe_commands(bench_path=".") -> List: return [] -def find_org(org_repo, using_cached: bool=False): +def find_org(org_repo, using_cached: bool = False): import requests org_repo = org_repo[0] @@ -432,10 +441,14 @@ def find_org(org_repo, using_cached: bool=False): if using_cached: return "", org_repo - raise InvalidRemoteException(f"{org_repo} not found under frappe or erpnext GitHub accounts") + raise InvalidRemoteException( + f"{org_repo} not found under frappe or erpnext GitHub accounts" + ) -def fetch_details_from_tag(_tag: str, using_cached: bool=False) -> Tuple[str, str, str]: +def fetch_details_from_tag( + _tag: str, using_cached: bool = False +) -> Tuple[str, str, str]: if not _tag: raise Exception("Tag is not provided") @@ -578,14 +591,17 @@ def get_cmd_from_sysargv(): def get_app_cache_extract_filter( count_threshold: int = 10_000, size_threshold: int = 1_000_000_000, -): # -> Callable[[TarInfo, str], TarInfo | None] +): # -> Callable[[TarInfo, str], TarInfo | None] state = dict(count=0, size=0) AbsoluteLinkError = Exception - def data_filter(m: TarInfo, _:str) -> TarInfo: + + def data_filter(m: TarInfo, _: str) -> TarInfo: return m - if (sys.version_info.major == 3 and sys.version_info.minor > 7) or sys.version_info.major > 3: + if ( + sys.version_info.major == 3 and sys.version_info.minor > 7 + ) or sys.version_info.major > 3: from tarfile import data_filter, AbsoluteLinkError def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: @@ -605,3 +621,18 @@ def get_app_cache_extract_filter( return None return filter_function + + +def get_file_md5(p: Path) -> "str": + with open(p.as_posix(), "rb") as f: + try: + file_md5 = hashlib.md5(usedforsecurity=False) + + # Will throw if < 3.9, can be removed once support + # is dropped + except TypeError: + file_md5 = hashlib.md5() + + while chunk := f.read(2**16): + file_md5.update(chunk) + return file_md5.hexdigest() diff --git a/bench/utils/bench.py b/bench/utils/bench.py index 9eba296a..acc0247f 100644 --- a/bench/utils/bench.py +++ b/bench/utils/bench.py @@ -688,7 +688,7 @@ def cache_list() -> None: created = datetime.fromtimestamp(stat.st_ctime) accessed = datetime.fromtimestamp(stat.st_atime) - app = item.name.split("-")[0] + app = item.name.split(".")[0] tot_items += 1 tot_size += stat.st_size compressed = item.suffix == ".tgz" @@ -696,7 +696,7 @@ def cache_list() -> None: if not printed_header: click.echo( f"{'APP':15} " - f"{'FILE':25} " + f"{'FILE':90} " f"{'SIZE':>13} " f"{'COMPRESSED'} " f"{'CREATED':19} " @@ -706,7 +706,7 @@ def cache_list() -> None: click.echo( f"{app:15} " - f"{item.name:25} " + f"{item.name:90} " f"{size_mb:10.3f} MB " f"{str(compressed):10} " f"{created:%Y-%m-%d %H:%M:%S} " diff --git a/pyproject.toml b/pyproject.toml index 01db07b0..2b8421f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "frappe-bench" description = "CLI to manage Multi-tenant deployments for Frappe apps" readme = "README.md" license = "GPL-3.0-only" -requires-python = ">=3.7" +requires-python = ">=3.8" authors = [ { name = "Frappe Technologies Pvt Ltd", email = "developers@frappe.io" }, ]