From b57838f36615b659930a4f38cfd14375097a8ad1 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Tue, 22 Nov 2022 12:16:54 +0530 Subject: [PATCH] feat: add `max_requests` to gunicorn args As gunicorn is long running process, potentially running for days without restart the workers might start accumulating garbage that's never cleaned up and memory usage spikes after some use. This largely happens because of third-party module imports like pandas, openpyxl, numpy etc. All of these are required only for few requests and can be easily re-loaded when required. `max_requests` restarts the worker after processing number of configured requests. How to use? - If you have more than 1 gunicorn workers then this is automatically enabled. You can tweak the max_requests parameter with `gunicorn_max_requests` key in common_site_config - If you just have 1 gunicorn worker (not recommended) then this is not automatically enabled as restarting the only worker can cause spikes in response times whenever restart is triggered. --- bench/config/common_site_config.py | 14 ++++++++++++++ bench/config/supervisor.py | 11 +++++++---- bench/config/systemd.py | 11 +++++++---- bench/config/templates/supervisor.conf | 2 +- .../systemd/frappe-bench-frappe-web.service | 2 +- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/bench/config/common_site_config.py b/bench/config/common_site_config.py index d278dedb..41242aad 100644 --- a/bench/config/common_site_config.py +++ b/bench/config/common_site_config.py @@ -15,6 +15,7 @@ default_config = { "live_reload": True, } +DEFAULT_MAX_REQUESTS = 5000 def setup_config(bench_path): make_pid_folder(bench_path) @@ -61,6 +62,19 @@ def get_gunicorn_workers(): return {"gunicorn_workers": multiprocessing.cpu_count() * 2 + 1} +def compute_max_requests_jitter(max_requests: int) -> int: + return int(max_requests * 0.1) + +def get_default_max_requests(worker_count: int): + """Get max requests and jitter config based on number of available workers.""" + + if worker_count <= 1: + # If there's only one worker then random restart can cause spikes in response times and + # can be annoying. Hence not enabled by default. + return 0 + return DEFAULT_MAX_REQUESTS + + def update_config_for_frappe(config, bench_path): ports = make_ports(bench_path) diff --git a/bench/config/supervisor.py b/bench/config/supervisor.py index d38da668..58015b81 100644 --- a/bench/config/supervisor.py +++ b/bench/config/supervisor.py @@ -8,7 +8,7 @@ import bench from bench.app import use_rq from bench.utils import get_bench_name, which from bench.bench import Bench -from bench.config.common_site_config import update_config, get_gunicorn_workers +from bench.config.common_site_config import update_config, get_gunicorn_workers, get_default_max_requests, compute_max_requests_jitter # imports - third party imports import click @@ -26,6 +26,9 @@ def generate_supervisor_config(bench_path, user=None, yes=False, skip_redis=Fals template = bench.config.env().get_template("supervisor.conf") bench_dir = os.path.abspath(bench_path) + web_worker_count = config.get("gunicorn_workers", get_gunicorn_workers()["gunicorn_workers"]) + max_requests = config.get("gunicorn_max_requests", get_default_max_requests(web_worker_count)) + config = template.render( **{ "bench_dir": bench_dir, @@ -39,9 +42,9 @@ def generate_supervisor_config(bench_path, user=None, yes=False, skip_redis=Fals "redis_socketio_config": os.path.join(bench_dir, "config", "redis_socketio.conf"), "redis_queue_config": os.path.join(bench_dir, "config", "redis_queue.conf"), "webserver_port": config.get("webserver_port", 8000), - "gunicorn_workers": config.get( - "gunicorn_workers", get_gunicorn_workers()["gunicorn_workers"] - ), + "gunicorn_workers": web_worker_count, + "gunicorn_max_requests": max_requests, + "gunicorn_max_requests_jitter": compute_max_requests_jitter(max_requests), "bench_name": get_bench_name(bench_path), "background_workers": config.get("background_workers") or 1, "bench_cmd": which("bench"), diff --git a/bench/config/systemd.py b/bench/config/systemd.py index d30edfc9..a677391c 100644 --- a/bench/config/systemd.py +++ b/bench/config/systemd.py @@ -9,7 +9,7 @@ import click import bench from bench.app import use_rq from bench.bench import Bench -from bench.config.common_site_config import get_gunicorn_workers, update_config +from bench.config.common_site_config import get_gunicorn_workers, update_config, get_default_max_requests, compute_max_requests_jitter from bench.utils import exec_cmd, which, get_bench_name @@ -61,6 +61,9 @@ def generate_systemd_config( get_bench_name(bench_path) + "-frappe-long-worker@" + str(i + 1) + ".service" ) + web_worker_count = config.get("gunicorn_workers", get_gunicorn_workers()["gunicorn_workers"]) + max_requests = config.get("gunicorn_max_requests", get_default_max_requests(web_worker_count)) + bench_info = { "bench_dir": bench_dir, "sites_dir": os.path.join(bench_dir, "sites"), @@ -73,9 +76,9 @@ def generate_systemd_config( "redis_socketio_config": os.path.join(bench_dir, "config", "redis_socketio.conf"), "redis_queue_config": os.path.join(bench_dir, "config", "redis_queue.conf"), "webserver_port": config.get("webserver_port", 8000), - "gunicorn_workers": config.get( - "gunicorn_workers", get_gunicorn_workers()["gunicorn_workers"] - ), + "gunicorn_workers": web_worker_count, + "gunicorn_max_requests": max_requests, + "gunicorn_max_requests_jitter": compute_max_requests_jitter(max_requests), "bench_name": get_bench_name(bench_path), "worker_target_wants": " ".join(background_workers), "bench_cmd": which("bench"), diff --git a/bench/config/templates/supervisor.conf b/bench/config/templates/supervisor.conf index 085cc2cf..f29c1673 100644 --- a/bench/config/templates/supervisor.conf +++ b/bench/config/templates/supervisor.conf @@ -3,7 +3,7 @@ ; killasgroup=true --> send kill signal to child processes too [program:{{ bench_name }}-frappe-web] -command={{ bench_dir }}/env/bin/gunicorn -b 127.0.0.1:{{ webserver_port }} -w {{ gunicorn_workers }} -t {{ http_timeout }} frappe.app:application --preload +command={{ bench_dir }}/env/bin/gunicorn -b 127.0.0.1:{{ webserver_port }} -w {{ gunicorn_workers }} --max-requests {{ gunicorn_max_requests }} --max-requests-jitter {{ gunicorn_max_requests_jitter }} -t {{ http_timeout }} frappe.app:application --preload priority=4 autostart=true autorestart=true diff --git a/bench/config/templates/systemd/frappe-bench-frappe-web.service b/bench/config/templates/systemd/frappe-bench-frappe-web.service index bb2f0e38..0621e1d3 100644 --- a/bench/config/templates/systemd/frappe-bench-frappe-web.service +++ b/bench/config/templates/systemd/frappe-bench-frappe-web.service @@ -6,7 +6,7 @@ PartOf={{ bench_name }}-web.target User={{ user }} Group={{ user }} Restart=always -ExecStart={{ bench_dir }}/env/bin/gunicorn -b 127.0.0.1:{{ webserver_port }} -w {{ gunicorn_workers }} -t {{ http_timeout }} frappe.app:application --preload +ExecStart={{ bench_dir }}/env/bin/gunicorn -b 127.0.0.1:{{ webserver_port }} -w {{ gunicorn_workers }} -t {{ http_timeout }} --max-requests {{ gunicorn_max_requests }} --max-requests-jitter {{ gunicorn_max_requests_jitter }} frappe.app:application --preload StandardOutput=file:{{ bench_dir }}/logs/web.log StandardError=file:{{ bench_dir }}/logs/web.error.log WorkingDirectory={{ sites_dir }}