From e8ea98552c973f9ac126e728d465de3723a9701f Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Tue, 23 Jan 2024 17:20:16 +0530 Subject: [PATCH 1/4] fix: add safety filter for untarring --- bench/app.py | 7 ++++++- bench/utils/__init__.py | 28 +++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/bench/app.py b/bench/app.py index 322afa54..54de1758 100755 --- a/bench/app.py +++ b/bench/app.py @@ -24,6 +24,7 @@ from bench.exceptions import NotInBenchDirectoryError from bench.utils import ( UNSET_ARG, fetch_details_from_tag, + get_app_cache_extract_filter, get_available_folder_name, get_bench_cache_path, is_bench_directory, @@ -343,7 +344,11 @@ class App(AppMeta): click.secho(f"Getting {self.app_name} from cache", fg="yellow") with tarfile.open(cache_path, mode) as tar: - tar.extractall(app_path.parent) + try: + tar.extractall(app_path.parent, filter=get_app_cache_extract_filter()) + except: + shutil.rmtree(app_path) + return False return True diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index bd07ec4b..8c5b0a71 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -9,7 +9,8 @@ from functools import lru_cache from glob import glob from pathlib import Path from shlex import split -from typing import List, Optional, Tuple +from tarfile import data_filter, AbsoluteLinkError, TarInfo +from typing import Callable, List, Optional, Tuple # imports - third party imports import click @@ -569,3 +570,28 @@ def get_cmd_from_sysargv(): break return cmd_from_ctx + + +def get_app_cache_extract_filter( + count_threshold: int = 10_000, + size_threshold: int = 1_000_000_000, +) -> Callable[[TarInfo, str], TarInfo | None]: + state = dict(count=0, size=0) + + def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: + state["count"] += 1 + state["size"] += member.size + + if state["count"] > count_threshold: + raise Exception(f"Number of entries exceeds threshold ({state['count']})") + + if state["size"] > size_threshold: + raise Exception(f"Extracted size exceeds threshold ({state['size']})") + + try: + return data_filter(member, dest_path) + except AbsoluteLinkError: + # Links created by `frappe` after extraction + return None + + return filter_function From 23bd717d7b9ebc6995833c9064425e6a4cb20096 Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Tue, 23 Jan 2024 17:26:08 +0530 Subject: [PATCH 2/4] feat: comment out unsupported typing --- bench/utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index 8c5b0a71..87d66441 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -10,7 +10,7 @@ from glob import glob from pathlib import Path from shlex import split from tarfile import data_filter, AbsoluteLinkError, TarInfo -from typing import Callable, List, Optional, Tuple +from typing import List, Optional, Tuple # imports - third party imports import click @@ -575,7 +575,7 @@ def get_cmd_from_sysargv(): def get_app_cache_extract_filter( count_threshold: int = 10_000, size_threshold: int = 1_000_000_000, -) -> Callable[[TarInfo, str], TarInfo | None]: +): # -> Callable[[TarInfo, str], TarInfo | None] state = dict(count=0, size=0) def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: From 80f2e70af68a8f089b558b1a814c39ecd0b24389 Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Wed, 24 Jan 2024 11:58:03 +0530 Subject: [PATCH 3/4] fix: version check before data_filter import - better error handling if untar fails --- bench/app.py | 3 ++- bench/utils/__init__.py | 12 +++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/bench/app.py b/bench/app.py index 54de1758..cf437898 100755 --- a/bench/app.py +++ b/bench/app.py @@ -346,7 +346,8 @@ class App(AppMeta): with tarfile.open(cache_path, mode) as tar: try: tar.extractall(app_path.parent, filter=get_app_cache_extract_filter()) - except: + except Exception: + logger.exception(f"Cache extraction failed for {self.app_name}") shutil.rmtree(app_path) return False diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index 87d66441..ab76ce29 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -9,7 +9,7 @@ from functools import lru_cache from glob import glob from pathlib import Path from shlex import split -from tarfile import data_filter, AbsoluteLinkError, TarInfo +from tarfile import AbsoluteLinkError, TarInfo from typing import List, Optional, Tuple # imports - third party imports @@ -578,15 +578,21 @@ def get_app_cache_extract_filter( ): # -> Callable[[TarInfo, str], TarInfo | None] state = dict(count=0, size=0) + if sys.version_info.major <=2 or sys.version_info.minor <=8: + def data_filter(m, p): + return m + else: + from tarfile import data_filter + def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: state["count"] += 1 state["size"] += member.size if state["count"] > count_threshold: - raise Exception(f"Number of entries exceeds threshold ({state['count']})") + raise RuntimeError(f"Number of entries exceeds threshold ({state['count']})") if state["size"] > size_threshold: - raise Exception(f"Extracted size exceeds threshold ({state['size']})") + raise RuntimeError(f"Extracted size exceeds threshold ({state['size']})") try: return data_filter(member, dest_path) From 3502c776c0c265f767d1c159e332b0e23aa9ed29 Mon Sep 17 00:00:00 2001 From: 18alantom <2.alan.tom@gmail.com> Date: Wed, 24 Jan 2024 12:11:04 +0530 Subject: [PATCH 4/4] fix: version check before AbsoluteLinkError --- bench/utils/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bench/utils/__init__.py b/bench/utils/__init__.py index ab76ce29..3fc2a7bb 100644 --- a/bench/utils/__init__.py +++ b/bench/utils/__init__.py @@ -9,7 +9,7 @@ from functools import lru_cache from glob import glob from pathlib import Path from shlex import split -from tarfile import AbsoluteLinkError, TarInfo +from tarfile import TarInfo from typing import List, Optional, Tuple # imports - third party imports @@ -578,11 +578,12 @@ def get_app_cache_extract_filter( ): # -> Callable[[TarInfo, str], TarInfo | None] state = dict(count=0, size=0) - if sys.version_info.major <=2 or sys.version_info.minor <=8: - def data_filter(m, p): - return m - else: - from tarfile import data_filter + AbsoluteLinkError = Exception + def data_filter(m: TarInfo, _:str) -> TarInfo: + return m + + if (sys.version_info.major == 3 and sys.version_info.minor > 7) or sys.version_info.major > 3: + from tarfile import data_filter, AbsoluteLinkError def filter_function(member: TarInfo, dest_path: str) -> Optional[TarInfo]: state["count"] += 1