From 55bc4c63fab03276931aed398b251cdd2720ceb0 Mon Sep 17 00:00:00 2001 From: Keith Grootboom Date: Thu, 21 Jul 2022 15:15:08 +0200 Subject: [PATCH] fix: change kubectl wait to look at deployments not pods When waiting for pods, it's possible that the deployment may be complete but, because other pods may have been Evicted or Killed, the wait wait condition completes. --- CHANGELOG.md | 1 + tutor/commands/k8s.py | 14 +++++++------- tutor/commands/upgrade/k8s.py | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b16ced..22cd274 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Every user-facing change should have an entry in this changelog. Please respect - [Fix] `tutor dev quickstart` would fail under certain versions of docker-compose due to a bug in the logic that handled volume mounting. (by @kdmccormick) - [Bugfix] The `tutor k8s start` command will succeed even when `k8s-override` and `kustomization-patches-strategic-merge` are not specified. (by @edazzocaisser) +- [Fix] `kubectl wait` checks deployments instead of pods as it could hang indefinitely if there are extra pods in a broken state. (by @keithgg) ## v14.0.3 (2022-07-09) diff --git a/tutor/commands/k8s.py b/tutor/commands/k8s.py index ea82ec8..0a855dc 100644 --- a/tutor/commands/k8s.py +++ b/tutor/commands/k8s.py @@ -332,10 +332,10 @@ def delete(context: K8sContext, yes: bool) -> None: def init(context: K8sContext, limit: Optional[str]) -> None: config = tutor_config.load(context.root) runner = context.job_runner(config) - wait_for_pod_ready(config, "caddy") + wait_for_deployment_ready(config, "caddy") for name in ["elasticsearch", "mysql", "mongodb"]: if tutor_config.is_service_activated(config, name): - wait_for_pod_ready(config, name) + wait_for_deployment_ready(config, name) jobs.initialise(runner, limit_to=limit) @@ -458,7 +458,7 @@ def logs( @click.pass_obj def wait(context: K8sContext, name: str) -> None: config = tutor_config.load(context.root) - wait_for_pod_ready(config, name) + wait_for_deployment_ready(config, name) @click.command( @@ -536,14 +536,14 @@ def kubectl_exec(config: Config, service: str, command: List[str]) -> int: ) -def wait_for_pod_ready(config: Config, service: str) -> None: - fmt.echo_info(f"Waiting for a {service} pod to be ready...") +def wait_for_deployment_ready(config: Config, service: str) -> None: + fmt.echo_info(f"Waiting for a {service} deployment to be ready...") utils.kubectl( "wait", *resource_selector(config, f"app.kubernetes.io/name={service}"), - "--for=condition=ContainersReady", + "--for=condition=Available=True", "--timeout=600s", - "pod", + "deployment", ) diff --git a/tutor/commands/upgrade/k8s.py b/tutor/commands/upgrade/k8s.py index 0cce122..af970a0 100644 --- a/tutor/commands/upgrade/k8s.py +++ b/tutor/commands/upgrade/k8s.py @@ -120,7 +120,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None: "--selector", "app.kubernetes.io/name=mysql", ) - k8s.wait_for_pod_ready(config, "mysql") + k8s.wait_for_deployment_ready(config, "mysql") # lms upgrade k8s.kubectl_apply( @@ -128,7 +128,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None: "--selector", "app.kubernetes.io/name=lms", ) - k8s.wait_for_pod_ready(config, "lms") + k8s.wait_for_deployment_ready(config, "lms") # Command backpopulate_user_tours k8s.kubectl_exec( @@ -144,7 +144,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None: "--selector", "app.kubernetes.io/name=cms", ) - k8s.wait_for_pod_ready(config, "cms") + k8s.wait_for_deployment_ready(config, "cms") # Command backfill_course_tabs k8s.kubectl_exec(