diff --git a/.gitignore b/.gitignore index 35826d3c..8d81a51f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # MAC OS .DS_Store +# VS Code +.vscode/ + # Vim Gitignore ## Swap [._]*.s[a-v][a-z] diff --git a/bench/app.py b/bench/app.py index f07251e4..cf437898 100755 --- a/bench/app.py +++ b/bench/app.py @@ -6,10 +6,12 @@ import re import shutil import subprocess import sys +import tarfile import typing from collections import OrderedDict from datetime import date from functools import lru_cache +from pathlib import Path from urllib.parse import urlparse # imports - third party imports @@ -22,7 +24,9 @@ from bench.exceptions import NotInBenchDirectoryError from bench.utils import ( UNSET_ARG, fetch_details_from_tag, + get_app_cache_extract_filter, get_available_folder_name, + get_bench_cache_path, is_bench_directory, is_git_url, is_valid_frappe_branch, @@ -166,6 +170,7 @@ class App(AppMeta): branch: str = None, bench: "Bench" = None, soft_link: bool = False, + cache_key = None, *args, **kwargs, ): @@ -173,6 +178,7 @@ class App(AppMeta): self.soft_link = soft_link self.required_by = None self.local_resolution = [] + self.cache_key = cache_key super().__init__(name, branch, *args, **kwargs) @step(title="Fetching App {repo}", success="App {repo} Fetched") @@ -227,6 +233,7 @@ class App(AppMeta): resolved=False, restart_bench=True, ignore_resolution=False, + using_cached=False ): import bench.cli from bench.utils.app import get_app_name @@ -247,6 +254,7 @@ class App(AppMeta): skip_assets=skip_assets, restart_bench=restart_bench, resolution=self.local_resolution, + using_cached=using_cached, ) @step(title="Cloning and installing {repo}", success="App {repo} Installed") @@ -283,6 +291,139 @@ class App(AppMeta): branch=self.tag, required=self.local_resolution, ) + + + """ + Get App Cache + + Since get-app affects only the `apps`, `env`, and `sites` + bench sub directories. If we assume deterministic builds + when get-app is called, the `apps/app_name` sub dir can be + cached. + + In subsequent builds this would save time by not having to: + - clone repository + - install frontend dependencies + - building frontend assets + as all of this is contained in the `apps/app_name` sub dir. + + Code that updates the `env` and `sites` subdirs still need + to be run. + """ + + def get_app_path(self) -> Path: + return Path(self.bench.name) / "apps" / self.app_name + + def get_app_cache_path(self, is_compressed=False) -> Path: + assert self.cache_key is not None + + cache_path = get_bench_cache_path("apps") + ext = "tgz" if is_compressed else "tar" + tarfile_name = f"{self.app_name}-{self.cache_key[:10]}.{ext}" + return cache_path / tarfile_name + + def get_cached(self) -> bool: + if not self.cache_key: + return False + + cache_path = self.get_app_cache_path() + mode = "r" + + # Check if cache exists without gzip + if not cache_path.is_file(): + cache_path = self.get_app_cache_path(True) + mode = "r:gz" + + # Check if cache exists with gzip + if not cache_path.is_file(): + return False + + app_path = self.get_app_path() + if app_path.is_dir(): + shutil.rmtree(app_path) + + click.secho(f"Getting {self.app_name} from cache", fg="yellow") + with tarfile.open(cache_path, mode) as tar: + try: + tar.extractall(app_path.parent, filter=get_app_cache_extract_filter()) + except Exception: + logger.exception(f"Cache extraction failed for {self.app_name}") + shutil.rmtree(app_path) + return False + + return True + + def set_cache(self, compress_artifacts=False) -> bool: + if not self.cache_key: + return False + + app_path = self.get_app_path() + if not app_path.is_dir(): + return False + + cwd = os.getcwd() + cache_path = self.get_app_cache_path(compress_artifacts) + mode = "w:gz" if compress_artifacts else "w" + + message = f"Caching {self.app_name} app directory" + if compress_artifacts: + message += " (compressed)" + click.secho(message) + + self.prune_app_directory() + + success = False + os.chdir(app_path.parent) + try: + with tarfile.open(cache_path, mode) as tar: + tar.add(app_path.name) + success = True + except Exception: + log(f"Failed to cache {app_path}", level=3) + success = False + finally: + os.chdir(cwd) + return success + + def prune_app_directory(self): + app_path = self.get_app_path() + remove_unused_node_modules(app_path) + + +def remove_unused_node_modules(app_path: Path) -> None: + """ + Erring a bit the side of caution; since there is no explicit way + to check if node_modules are utilized, this function checks if Vite + is being used to build the frontend code. + + Since most popular Frappe apps use Vite to build their frontends, + this method should suffice. + + Note: root package.json is ignored cause those usually belong to + apps that do not have a build step and so their node_modules are + utilized during runtime. + """ + + for p in app_path.iterdir(): + if not p.is_dir(): + continue + + package_json = p / "package.json" + if not package_json.is_file(): + continue + + node_modules = p / "node_modules" + if not node_modules.is_dir(): + continue + + can_delete = False + with package_json.open("r", encoding="utf-8") as f: + package_json = json.loads(f.read()) + build_script = package_json.get("scripts", {}).get("build", "") + can_delete = "vite build" in build_script + + if can_delete: + shutil.rmtree(node_modules) def make_resolution_plan(app: App, bench: "Bench"): @@ -346,6 +487,8 @@ def get_app( soft_link=False, init_bench=False, resolve_deps=False, + cache_key=None, + compress_artifacts=False, ): """bench get-app clones a Frappe App from remote (GitHub or any other git server), and installs it on the current bench. This also resolves dependencies based on the @@ -360,14 +503,14 @@ def get_app( from bench.utils.app import check_existing_dir bench = Bench(bench_path) - app = App(git_url, branch=branch, bench=bench, soft_link=soft_link) + app = App(git_url, branch=branch, bench=bench, soft_link=soft_link, cache_key=cache_key) git_url = app.url repo_name = app.repo branch = app.tag bench_setup = False restart_bench = not init_bench frappe_path, frappe_branch = None, None - + if resolve_deps: resolution = make_resolution_plan(app, bench) click.secho("Following apps will be installed", fg="bright_blue") @@ -417,6 +560,10 @@ def get_app( verbose=verbose, ) return + + if app.get_cached(): + app.install(verbose=verbose, skip_assets=skip_assets, restart_bench=restart_bench, using_cached=True) + return dir_already_exists, cloned_path = check_existing_dir(bench_path, repo_name) to_clone = not dir_already_exists @@ -442,6 +589,9 @@ def get_app( or click.confirm("Do you want to reinstall the existing application?") ): app.install(verbose=verbose, skip_assets=skip_assets, restart_bench=restart_bench) + + app.set_cache(compress_artifacts) + def install_resolved_deps( @@ -452,7 +602,6 @@ def install_resolved_deps( verbose=False, ): from bench.utils.app import check_existing_dir - if "frappe" in resolution: # Terminal dependency del resolution["frappe"] @@ -550,6 +699,7 @@ def install_app( restart_bench=True, skip_assets=False, resolution=UNSET_ARG, + using_cached=False, ): import bench.cli as bench_cli from bench.bench import Bench @@ -577,14 +727,14 @@ def install_app( if conf.get("developer_mode"): install_python_dev_dependencies(apps=app, bench_path=bench_path, verbose=verbose) - if os.path.exists(os.path.join(app_path, "package.json")): + if not using_cached and os.path.exists(os.path.join(app_path, "package.json")): yarn_install = "yarn install --verbose" if verbose else "yarn install" bench.run(yarn_install, cwd=app_path) bench.apps.sync(app_name=app, required=resolution, branch=tag, app_dir=app_path) if not skip_assets: - build_assets(bench_path=bench_path, app=app) + build_assets(bench_path=bench_path, app=app, using_cached=using_cached) if restart_bench: # Avoiding exceptions here as production might not be set-up @@ -621,9 +771,9 @@ Cannot proceed with update: You have local changes in app "{app}" that are not c Here are your choices: 1. Merge the {app} app manually with "git pull" / "git pull --rebase" and fix conflicts. -1. Temporarily remove your changes with "git stash" or discard them completely +2. Temporarily remove your changes with "git stash" or discard them completely with "bench update --reset" or for individual repositries "git reset --hard" -2. If your changes are helpful for others, send in a pull request via GitHub and +3. If your changes are helpful for others, send in a pull request via GitHub and wait for them to be merged in the core.""" ) sys.exit(1) diff --git a/bench/commands/__init__.py b/bench/commands/__init__.py index 5ef14212..1daf6146 100755 --- a/bench/commands/__init__.py +++ b/bench/commands/__init__.py @@ -72,6 +72,7 @@ bench_command.add_command(switch_to_develop) from bench.commands.utils import ( + app_cache_helper, backup_all_sites, bench_src, disable_production, @@ -108,6 +109,7 @@ bench_command.add_command(disable_production) bench_command.add_command(bench_src) bench_command.add_command(find_benches) bench_command.add_command(migrate_env) +bench_command.add_command(app_cache_helper) from bench.commands.setup import setup diff --git a/bench/commands/install.py b/bench/commands/install.py index 31ad59b6..a0f1fd41 100644 --- a/bench/commands/install.py +++ b/bench/commands/install.py @@ -77,7 +77,7 @@ def install_nginx(user=None): setup_sudoers(user) -@click.command("virtualbox", help="Installs supervisor") +@click.command("virtualbox", help="Installs virtualbox") def install_virtualbox(): run_playbook("vm_build.yml", tag="virtualbox") diff --git a/bench/commands/make.py b/bench/commands/make.py index 7369e9c8..846e7b49 100755 --- a/bench/commands/make.py +++ b/bench/commands/make.py @@ -151,6 +151,18 @@ def drop(path): default=False, help="Resolve dependencies before installing app", ) +@click.option( + "--cache-key", + type=str, + default=None, + help="Caches get-app artifacts if provided (only first 10 chars is used)", +) +@click.option( + "--compress-artifacts", + is_flag=True, + default=False, + help="Whether to gzip get-app artifacts that are to be cached", +) def get_app( git_url, branch, @@ -160,6 +172,8 @@ def get_app( soft_link=False, init_bench=False, resolve_deps=False, + cache_key=None, + compress_artifacts=False, ): "clone an app from the internet and set it up in your bench" from bench.app import get_app @@ -172,6 +186,8 @@ def get_app( soft_link=soft_link, init_bench=init_bench, resolve_deps=resolve_deps, + cache_key=cache_key, + compress_artifacts=compress_artifacts, ) diff --git a/bench/commands/setup.py b/bench/commands/setup.py index 9b13c269..e291f86a 100755 --- a/bench/commands/setup.py +++ b/bench/commands/setup.py @@ -73,7 +73,9 @@ def setup_supervisor(user=None, yes=False, skip_redis=False, skip_supervisord=Fa generate_supervisor_config, ) - which("supervisorctl", raise_err=True) + if which("supervisorctl") is None: + click.secho("Please install `supervisor` to proceed", fg="red") + sys.exit(1) if not skip_supervisord and "Permission denied" in get_cmd_output( "supervisorctl status" diff --git a/bench/commands/utils.py b/bench/commands/utils.py index 9882e8f0..0a7d97c5 100644 --- a/bench/commands/utils.py +++ b/bench/commands/utils.py @@ -176,3 +176,21 @@ def migrate_env(python, backup=True): from bench.utils.bench import migrate_env migrate_env(python=python, backup=backup) + + +@click.command("app-cache", help="View or remove items belonging to bench get-app cache") +@click.option("--clear", is_flag=True, default=False, help="Remove all items") +@click.option( + "--remove-app", + default="", + help="Removes all items that match provided app name", +) +@click.option( + "--remove-key", + default="", + help="Removes all items that matches provided cache key", +) +def app_cache_helper(clear=False, remove_app="", remove_key=""): + from bench.utils.bench import cache_helper + + cache_helper(clear, remove_app, remove_key) diff --git a/bench/config/supervisor.py b/bench/config/supervisor.py index 1055d3ba..0eb6a848 100644 --- a/bench/config/supervisor.py +++ b/bench/config/supervisor.py @@ -59,6 +59,7 @@ def generate_supervisor_config(bench_path, user=None, yes=False, skip_redis=Fals "skip_redis": skip_redis, "workers": config.get("workers", {}), "multi_queue_consumption": can_enable_multi_queue_consumption(bench_path), + "supervisor_startretries": 10, } ) diff --git a/bench/config/templates/nginx.conf b/bench/config/templates/nginx.conf index 5cba5782..cd6a6701 100644 --- a/bench/config/templates/nginx.conf +++ b/bench/config/templates/nginx.conf @@ -58,6 +58,7 @@ server { location /assets { try_files $uri =404; + add_header Cache-Control "max-age=31536000"; } location ~ ^/protected/(.*) { diff --git a/bench/config/templates/supervisor.conf b/bench/config/templates/supervisor.conf index 57fd8574..9303edb7 100644 --- a/bench/config/templates/supervisor.conf +++ b/bench/config/templates/supervisor.conf @@ -14,6 +14,7 @@ stopwaitsecs=40 killasgroup=true user={{ user }} directory={{ sites_dir }} +startretries={{ supervisor_startretries }} [program:{{ bench_name }}-frappe-schedule] command={{ bench_cmd }} schedule @@ -24,6 +25,7 @@ stdout_logfile={{ bench_dir }}/logs/schedule.log stderr_logfile={{ bench_dir }}/logs/schedule.error.log user={{ user }} directory={{ bench_dir }} +startretries={{ supervisor_startretries }} {% if not multi_queue_consumption %} [program:{{ bench_name }}-frappe-default-worker] @@ -39,6 +41,7 @@ directory={{ bench_dir }} killasgroup=true numprocs={{ background_workers }} process_name=%(program_name)s-%(process_num)d +startretries={{ supervisor_startretries }} {% endif %} [program:{{ bench_name }}-frappe-short-worker] @@ -54,6 +57,7 @@ directory={{ bench_dir }} killasgroup=true numprocs={{ background_workers }} process_name=%(program_name)s-%(process_num)d +startretries={{ supervisor_startretries }} [program:{{ bench_name }}-frappe-long-worker] command={{ bench_cmd }} worker --queue long{{',default,short' if multi_queue_consumption else ''}} @@ -68,6 +72,7 @@ directory={{ bench_dir }} killasgroup=true numprocs={{ background_workers }} process_name=%(program_name)s-%(process_num)d +startretries={{ supervisor_startretries }} {% for worker_name, worker_details in workers.items() %} [program:{{ bench_name }}-frappe-{{ worker_name }}-worker] @@ -83,6 +88,7 @@ directory={{ bench_dir }} killasgroup=true numprocs={{ worker_details["background_workers"] or background_workers }} process_name=%(program_name)s-%(process_num)d +startretries={{ supervisor_startretries }} {% endfor %} @@ -96,6 +102,7 @@ stdout_logfile={{ bench_dir }}/logs/redis-cache.log stderr_logfile={{ bench_dir }}/logs/redis-cache.error.log user={{ user }} directory={{ sites_dir }} +startretries={{ supervisor_startretries }} [program:{{ bench_name }}-redis-queue] command={{ redis_server }} {{ redis_queue_config }} @@ -106,6 +113,7 @@ stdout_logfile={{ bench_dir }}/logs/redis-queue.log stderr_logfile={{ bench_dir }}/logs/redis-queue.error.log user={{ user }} directory={{ sites_dir }} +startretries={{ supervisor_startretries }} {% endif %} {% if node %} @@ -118,6 +126,7 @@ stdout_logfile={{ bench_dir }}/logs/node-socketio.log stderr_logfile={{ bench_dir }}/logs/node-socketio.error.log user={{ user }} directory={{ bench_dir }} +startretries={{ supervisor_startretries }} {% endif %} [group:{{ bench_name }}-web] diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index 3fe17ad2..3fc2a7bb 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -7,8 +7,10 @@ import subprocess import sys from functools import lru_cache from glob import glob +from pathlib import Path from shlex import split -from typing import List, Tuple +from tarfile import TarInfo +from typing import List, Optional, Tuple # imports - third party imports import click @@ -50,6 +52,15 @@ def is_frappe_app(directory: str) -> bool: return bool(is_frappe_app) +def get_bench_cache_path(sub_dir: Optional[str]) -> Path: + relative_path = "~/.cache/bench" + if sub_dir and not sub_dir.startswith("/"): + relative_path += f"/{sub_dir}" + + cache_path = os.path.expanduser(relative_path) + cache_path = Path(cache_path) + cache_path.mkdir(parents=True, exist_ok=True) + return cache_path @lru_cache(maxsize=None) def is_valid_frappe_branch(frappe_path: str, frappe_branch: str): @@ -559,3 +570,35 @@ def get_cmd_from_sysargv(): break return cmd_from_ctx + + +def get_app_cache_extract_filter( + count_threshold: int = 10_000, + size_threshold: int = 1_000_000_000, +): # -> Callable[[TarInfo, str], TarInfo | None] + state = dict(count=0, size=0) + + AbsoluteLinkError = Exception + def data_filter(m: TarInfo, _:str) -> TarInfo: + return m + + if (sys.version_info.major == 3 and sys.version_info.minor > 7) or sys.version_info.major > 3: + from tarfile import data_filter, AbsoluteLinkError + + def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: + state["count"] += 1 + state["size"] += member.size + + if state["count"] > count_threshold: + raise RuntimeError(f"Number of entries exceeds threshold ({state['count']})") + + if state["size"] > size_threshold: + raise RuntimeError(f"Extracted size exceeds threshold ({state['size']})") + + try: + return data_filter(member, dest_path) + except AbsoluteLinkError: + # Links created by `frappe` after extraction + return None + + return filter_function diff --git a/bench/utils/bench.py b/bench/utils/bench.py index b825200d..c0b9a430 100644 --- a/bench/utils/bench.py +++ b/bench/utils/bench.py @@ -4,11 +4,13 @@ import json import logging import os import re +import shutil import subprocess import sys from functools import lru_cache from glob import glob from json.decoder import JSONDecodeError +from pathlib import Path # imports - third party imports import click @@ -16,7 +18,8 @@ import click # imports - module imports import bench from bench.exceptions import PatchError, ValidationError -from bench.utils import exec_cmd, get_bench_name, get_cmd_output, log, which +from bench.utils import (exec_cmd, get_bench_cache_path, get_bench_name, + get_cmd_output, log, which) logger = logging.getLogger(bench.PROJECT_NAME) @@ -350,11 +353,16 @@ def restart_process_manager(bench_path=".", web_workers=False): exec_cmd(f"overmind restart {worker}", cwd=bench_path) -def build_assets(bench_path=".", app=None): +def build_assets(bench_path=".", app=None, using_cached=False): command = "bench build" if app: command += f" --app {app}" - exec_cmd(command, cwd=bench_path, env={"BENCH_DEVELOPER": "1"}) + + env = {"BENCH_DEVELOPER": "1"} + if using_cached: + env["USING_CACHED"] = "1" + + exec_cmd(command, cwd=bench_path, env=env) def handle_version_upgrade(version_upgrade, bench_path, force, reset, conf): @@ -635,3 +643,115 @@ To switch to your required branch, run the following commands: bench switch-to-b ) sys.exit(1) + + +def cache_helper(clear=False, remove_app="", remove_key="") -> None: + can_remove = bool(remove_key or remove_app) + if not clear and not can_remove: + cache_list() + elif can_remove: + cache_remove(remove_app, remove_key) + elif clear: + cache_clear() + else: + pass # unreachable + + +def cache_list() -> None: + from datetime import datetime + + tot_size = 0 + tot_items = 0 + + printed_header = False + for item in get_bench_cache_path("apps").iterdir(): + if item.suffix not in [".tar", ".tgz"]: + continue + + stat = item.stat() + size_mb = stat.st_size / 1_000_000 + created = datetime.fromtimestamp(stat.st_ctime) + accessed = datetime.fromtimestamp(stat.st_atime) + + app = item.name.split("-")[0] + tot_items += 1 + tot_size += stat.st_size + compressed = item.suffix == ".tgz" + + if not printed_header: + click.echo( + f"{'APP':15} " + f"{'FILE':25} " + f"{'SIZE':>13} " + f"{'COMPRESSED'} " + f"{'CREATED':19} " + f"{'ACCESSED':19} " + ) + printed_header = True + + click.echo( + f"{app:15} " + f"{item.name:25} " + f"{size_mb:10.3f} MB " + f"{str(compressed):10} " + f"{created:%Y-%m-%d %H:%M:%S} " + f"{accessed:%Y-%m-%d %H:%M:%S} " + ) + + if tot_items: + click.echo(f"Total size {tot_size / 1_000_000:.3f} MB belonging to {tot_items} items") + else: + click.echo("No cached items") + + +def cache_remove(app: str = "", key: str = "") -> None: + rem_items = 0 + rem_size = 0 + for item in get_bench_cache_path("apps").iterdir(): + if not should_remove_item(item, app, key): + continue + + rem_items += 1 + rem_size += item.stat().st_size + item.unlink(True) + click.echo(f"Removed {item.name}") + + if rem_items: + click.echo(f"Cleared {rem_size / 1_000_000:.3f} MB belonging to {rem_items} items") + else: + click.echo("No items removed") + + +def should_remove_item(item: Path, app: str, key: str) -> bool: + if item.suffix not in [".tar", ".tgz"]: + return False + + name = item.name + if app and key and name.startswith(f"{app}-{key[:10]}."): + return True + + if app and name.startswith(f"{app}-"): + return True + + if key and f"-{key[:10]}." in name: + return True + + return False + + +def cache_clear() -> None: + cache_path = get_bench_cache_path("apps") + tot_items = len(os.listdir(cache_path)) + if not tot_items: + click.echo("No cached items") + return + + tot_size = get_dir_size(cache_path) + shutil.rmtree(cache_path) + + if tot_items: + click.echo(f"Cleared {tot_size / 1_000_000:.3f} MB belonging to {tot_items} items") + + +def get_dir_size(p: Path) -> int: + return sum(i.stat(follow_symlinks=False).st_size for i in p.iterdir()) diff --git a/pyproject.toml b/pyproject.toml index d1011ff1..af7d0eaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ dependencies = [ "Click>=7.0", "GitPython~=3.1.30", "honcho", - "Jinja2~=3.0.3", + "Jinja2~=3.1.3", "python-crontab~=2.6.0", "requests", "semantic-version~=2.8.2",