2
0
mirror of https://github.com/frappe/bench.git synced 2024-06-19 08:22:20 +00:00

Merge pull request #1519 from frappe/get-app-cache

feat: cache get-app artifacts by commit_hash
This commit is contained in:
Alan 2024-01-23 16:20:25 +05:30 committed by GitHub
commit 36c3cf4415
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 323 additions and 10 deletions

3
.gitignore vendored
View File

@ -1,6 +1,9 @@
# MAC OS
.DS_Store
# VS Code
.vscode/
# Vim Gitignore
## Swap
[._]*.s[a-v][a-z]

View File

@ -6,10 +6,12 @@ import re
import shutil
import subprocess
import sys
import tarfile
import typing
from collections import OrderedDict
from datetime import date
from functools import lru_cache
from pathlib import Path
from urllib.parse import urlparse
# imports - third party imports
@ -23,6 +25,7 @@ from bench.utils import (
UNSET_ARG,
fetch_details_from_tag,
get_available_folder_name,
get_bench_cache_path,
is_bench_directory,
is_git_url,
is_valid_frappe_branch,
@ -166,6 +169,7 @@ class App(AppMeta):
branch: str = None,
bench: "Bench" = None,
soft_link: bool = False,
cache_key = None,
*args,
**kwargs,
):
@ -173,6 +177,7 @@ class App(AppMeta):
self.soft_link = soft_link
self.required_by = None
self.local_resolution = []
self.cache_key = cache_key
super().__init__(name, branch, *args, **kwargs)
@step(title="Fetching App {repo}", success="App {repo} Fetched")
@ -227,6 +232,7 @@ class App(AppMeta):
resolved=False,
restart_bench=True,
ignore_resolution=False,
using_cached=False
):
import bench.cli
from bench.utils.app import get_app_name
@ -247,6 +253,7 @@ class App(AppMeta):
skip_assets=skip_assets,
restart_bench=restart_bench,
resolution=self.local_resolution,
using_cached=using_cached,
)
@step(title="Cloning and installing {repo}", success="App {repo} Installed")
@ -283,6 +290,134 @@ class App(AppMeta):
branch=self.tag,
required=self.local_resolution,
)
"""
Get App Cache
Since get-app affects only the `apps`, `env`, and `sites`
bench sub directories. If we assume deterministic builds
when get-app is called, the `apps/app_name` sub dir can be
cached.
In subsequent builds this would save time by not having to:
- clone repository
- install frontend dependencies
- building frontend assets
as all of this is contained in the `apps/app_name` sub dir.
Code that updates the `env` and `sites` subdirs still need
to be run.
"""
def get_app_path(self) -> Path:
return Path(self.bench.name) / "apps" / self.app_name
def get_app_cache_path(self, is_compressed=False) -> Path:
assert self.cache_key is not None
cache_path = get_bench_cache_path("apps")
ext = "tgz" if is_compressed else "tar"
tarfile_name = f"{self.app_name}-{self.cache_key[:10]}.{ext}"
return cache_path / tarfile_name
def get_cached(self) -> bool:
if not self.cache_key:
return False
cache_path = self.get_app_cache_path()
mode = "r"
# Check if cache exists without gzip
if not cache_path.is_file():
cache_path = self.get_app_cache_path(True)
mode = "r:gz"
# Check if cache exists with gzip
if not cache_path.is_file():
return False
app_path = self.get_app_path()
if app_path.is_dir():
shutil.rmtree(app_path)
click.secho(f"Getting {self.app_name} from cache", fg="yellow")
with tarfile.open(cache_path, mode) as tar:
tar.extractall(app_path.parent)
return True
def set_cache(self, compress_artifacts=False) -> bool:
if not self.cache_key:
return False
app_path = self.get_app_path()
if not app_path.is_dir():
return False
cwd = os.getcwd()
cache_path = self.get_app_cache_path(compress_artifacts)
mode = "w:gz" if compress_artifacts else "w"
message = f"Caching {self.app_name} app directory"
if compress_artifacts:
message += " (compressed)"
click.secho(message)
self.prune_app_directory()
success = False
os.chdir(app_path.parent)
try:
with tarfile.open(cache_path, mode) as tar:
tar.add(app_path.name)
success = True
except Exception:
log(f"Failed to cache {app_path}", level=3)
success = False
finally:
os.chdir(cwd)
return success
def prune_app_directory(self):
app_path = self.get_app_path()
remove_unused_node_modules(app_path)
def remove_unused_node_modules(app_path: Path) -> None:
"""
Erring a bit the side of caution; since there is no explicit way
to check if node_modules are utilized, this function checks if Vite
is being used to build the frontend code.
Since most popular Frappe apps use Vite to build their frontends,
this method should suffice.
Note: root package.json is ignored cause those usually belong to
apps that do not have a build step and so their node_modules are
utilized during runtime.
"""
for p in app_path.iterdir():
if not p.is_dir():
continue
package_json = p / "package.json"
if not package_json.is_file():
continue
node_modules = p / "node_modules"
if not node_modules.is_dir():
continue
can_delete = False
with package_json.open("r", encoding="utf-8") as f:
package_json = json.loads(f.read())
build_script = package_json.get("scripts", {}).get("build", "")
can_delete = "vite build" in build_script
if can_delete:
shutil.rmtree(node_modules)
def make_resolution_plan(app: App, bench: "Bench"):
@ -346,6 +481,8 @@ def get_app(
soft_link=False,
init_bench=False,
resolve_deps=False,
cache_key=None,
compress_artifacts=False,
):
"""bench get-app clones a Frappe App from remote (GitHub or any other git server),
and installs it on the current bench. This also resolves dependencies based on the
@ -360,14 +497,14 @@ def get_app(
from bench.utils.app import check_existing_dir
bench = Bench(bench_path)
app = App(git_url, branch=branch, bench=bench, soft_link=soft_link)
app = App(git_url, branch=branch, bench=bench, soft_link=soft_link, cache_key=cache_key)
git_url = app.url
repo_name = app.repo
branch = app.tag
bench_setup = False
restart_bench = not init_bench
frappe_path, frappe_branch = None, None
if resolve_deps:
resolution = make_resolution_plan(app, bench)
click.secho("Following apps will be installed", fg="bright_blue")
@ -417,6 +554,10 @@ def get_app(
verbose=verbose,
)
return
if app.get_cached():
app.install(verbose=verbose, skip_assets=skip_assets, restart_bench=restart_bench, using_cached=True)
return
dir_already_exists, cloned_path = check_existing_dir(bench_path, repo_name)
to_clone = not dir_already_exists
@ -442,6 +583,9 @@ def get_app(
or click.confirm("Do you want to reinstall the existing application?")
):
app.install(verbose=verbose, skip_assets=skip_assets, restart_bench=restart_bench)
app.set_cache(compress_artifacts)
def install_resolved_deps(
@ -452,7 +596,6 @@ def install_resolved_deps(
verbose=False,
):
from bench.utils.app import check_existing_dir
if "frappe" in resolution:
# Terminal dependency
del resolution["frappe"]
@ -550,6 +693,7 @@ def install_app(
restart_bench=True,
skip_assets=False,
resolution=UNSET_ARG,
using_cached=False,
):
import bench.cli as bench_cli
from bench.bench import Bench
@ -577,14 +721,14 @@ def install_app(
if conf.get("developer_mode"):
install_python_dev_dependencies(apps=app, bench_path=bench_path, verbose=verbose)
if os.path.exists(os.path.join(app_path, "package.json")):
if not using_cached and os.path.exists(os.path.join(app_path, "package.json")):
yarn_install = "yarn install --verbose" if verbose else "yarn install"
bench.run(yarn_install, cwd=app_path)
bench.apps.sync(app_name=app, required=resolution, branch=tag, app_dir=app_path)
if not skip_assets:
build_assets(bench_path=bench_path, app=app)
build_assets(bench_path=bench_path, app=app, using_cached=using_cached)
if restart_bench:
# Avoiding exceptions here as production might not be set-up

View File

@ -72,6 +72,7 @@ bench_command.add_command(switch_to_develop)
from bench.commands.utils import (
app_cache_helper,
backup_all_sites,
bench_src,
disable_production,
@ -108,6 +109,7 @@ bench_command.add_command(disable_production)
bench_command.add_command(bench_src)
bench_command.add_command(find_benches)
bench_command.add_command(migrate_env)
bench_command.add_command(app_cache_helper)
from bench.commands.setup import setup

View File

@ -151,6 +151,18 @@ def drop(path):
default=False,
help="Resolve dependencies before installing app",
)
@click.option(
"--cache-key",
type=str,
default=None,
help="Caches get-app artifacts if provided (only first 10 chars is used)",
)
@click.option(
"--compress-artifacts",
is_flag=True,
default=False,
help="Whether to gzip get-app artifacts that are to be cached",
)
def get_app(
git_url,
branch,
@ -160,6 +172,8 @@ def get_app(
soft_link=False,
init_bench=False,
resolve_deps=False,
cache_key=None,
compress_artifacts=False,
):
"clone an app from the internet and set it up in your bench"
from bench.app import get_app
@ -172,6 +186,8 @@ def get_app(
soft_link=soft_link,
init_bench=init_bench,
resolve_deps=resolve_deps,
cache_key=cache_key,
compress_artifacts=compress_artifacts,
)

View File

@ -176,3 +176,21 @@ def migrate_env(python, backup=True):
from bench.utils.bench import migrate_env
migrate_env(python=python, backup=backup)
@click.command("app-cache", help="View or remove items belonging to bench get-app cache")
@click.option("--clear", is_flag=True, default=False, help="Remove all items")
@click.option(
"--remove-app",
default="",
help="Removes all items that match provided app name",
)
@click.option(
"--remove-key",
default="",
help="Removes all items that matches provided cache key",
)
def app_cache_helper(clear=False, remove_app="", remove_key=""):
from bench.utils.bench import cache_helper
cache_helper(clear, remove_app, remove_key)

View File

@ -7,8 +7,9 @@ import subprocess
import sys
from functools import lru_cache
from glob import glob
from pathlib import Path
from shlex import split
from typing import List, Tuple
from typing import List, Optional, Tuple
# imports - third party imports
import click
@ -50,6 +51,15 @@ def is_frappe_app(directory: str) -> bool:
return bool(is_frappe_app)
def get_bench_cache_path(sub_dir: Optional[str]) -> Path:
relative_path = "~/.cache/bench"
if sub_dir and not sub_dir.startswith("/"):
relative_path += f"/{sub_dir}"
cache_path = os.path.expanduser(relative_path)
cache_path = Path(cache_path)
cache_path.mkdir(parents=True, exist_ok=True)
return cache_path
@lru_cache(maxsize=None)
def is_valid_frappe_branch(frappe_path: str, frappe_branch: str):

View File

@ -4,11 +4,13 @@ import json
import logging
import os
import re
import shutil
import subprocess
import sys
from functools import lru_cache
from glob import glob
from json.decoder import JSONDecodeError
from pathlib import Path
# imports - third party imports
import click
@ -16,7 +18,8 @@ import click
# imports - module imports
import bench
from bench.exceptions import PatchError, ValidationError
from bench.utils import exec_cmd, get_bench_name, get_cmd_output, log, which
from bench.utils import (exec_cmd, get_bench_cache_path, get_bench_name,
get_cmd_output, log, which)
logger = logging.getLogger(bench.PROJECT_NAME)
@ -350,11 +353,16 @@ def restart_process_manager(bench_path=".", web_workers=False):
exec_cmd(f"overmind restart {worker}", cwd=bench_path)
def build_assets(bench_path=".", app=None):
def build_assets(bench_path=".", app=None, using_cached=False):
command = "bench build"
if app:
command += f" --app {app}"
exec_cmd(command, cwd=bench_path, env={"BENCH_DEVELOPER": "1"})
env = {"BENCH_DEVELOPER": "1"}
if using_cached:
env["USING_CACHED"] = "1"
exec_cmd(command, cwd=bench_path, env=env)
def handle_version_upgrade(version_upgrade, bench_path, force, reset, conf):
@ -635,3 +643,115 @@ To switch to your required branch, run the following commands: bench switch-to-b
)
sys.exit(1)
def cache_helper(clear=False, remove_app="", remove_key="") -> None:
can_remove = bool(remove_key or remove_app)
if not clear and not can_remove:
cache_list()
elif can_remove:
cache_remove(remove_app, remove_key)
elif clear:
cache_clear()
else:
pass # unreachable
def cache_list() -> None:
from datetime import datetime
tot_size = 0
tot_items = 0
printed_header = False
for item in get_bench_cache_path("apps").iterdir():
if item.suffix not in [".tar", ".tgz"]:
continue
stat = item.stat()
size_mb = stat.st_size / 1_000_000
created = datetime.fromtimestamp(stat.st_ctime)
accessed = datetime.fromtimestamp(stat.st_atime)
app = item.name.split("-")[0]
tot_items += 1
tot_size += stat.st_size
compressed = item.suffix == ".tgz"
if not printed_header:
click.echo(
f"{'APP':15} "
f"{'FILE':25} "
f"{'SIZE':>13} "
f"{'COMPRESSED'} "
f"{'CREATED':19} "
f"{'ACCESSED':19} "
)
printed_header = True
click.echo(
f"{app:15} "
f"{item.name:25} "
f"{size_mb:10.3f} MB "
f"{str(compressed):10} "
f"{created:%Y-%m-%d %H:%M:%S} "
f"{accessed:%Y-%m-%d %H:%M:%S} "
)
if tot_items:
click.echo(f"Total size {tot_size / 1_000_000:.3f} MB belonging to {tot_items} items")
else:
click.echo("No cached items")
def cache_remove(app: str = "", key: str = "") -> None:
rem_items = 0
rem_size = 0
for item in get_bench_cache_path("apps").iterdir():
if not should_remove_item(item, app, key):
continue
rem_items += 1
rem_size += item.stat().st_size
item.unlink(True)
click.echo(f"Removed {item.name}")
if rem_items:
click.echo(f"Cleared {rem_size / 1_000_000:.3f} MB belonging to {rem_items} items")
else:
click.echo("No items removed")
def should_remove_item(item: Path, app: str, key: str) -> bool:
if item.suffix not in [".tar", ".tgz"]:
return False
name = item.name
if app and key and name.startswith(f"{app}-{key[:10]}."):
return True
if app and name.startswith(f"{app}-"):
return True
if key and f"-{key[:10]}." in name:
return True
return False
def cache_clear() -> None:
cache_path = get_bench_cache_path("apps")
tot_items = len(os.listdir(cache_path))
if not tot_items:
click.echo("No cached items")
return
tot_size = get_dir_size(cache_path)
shutil.rmtree(cache_path)
if tot_items:
click.echo(f"Cleared {tot_size / 1_000_000:.3f} MB belonging to {tot_items} items")
def get_dir_size(p: Path) -> int:
return sum(i.stat(follow_symlinks=False).st_size for i in p.iterdir())

View File

@ -22,7 +22,7 @@ dependencies = [
"Click>=7.0",
"GitPython~=3.1.30",
"honcho",
"Jinja2~=3.0.3",
"Jinja2~=3.1.3",
"python-crontab~=2.6.0",
"requests",
"semantic-version~=2.8.2",