From 55bc4c63fab03276931aed398b251cdd2720ceb0 Mon Sep 17 00:00:00 2001 From: Keith Grootboom Date: Thu, 21 Jul 2022 15:15:08 +0200 Subject: [PATCH 1/3] fix: change kubectl wait to look at deployments not pods When waiting for pods, it's possible that the deployment may be complete but, because other pods may have been Evicted or Killed, the wait wait condition completes. --- CHANGELOG.md | 1 + tutor/commands/k8s.py | 14 +++++++------- tutor/commands/upgrade/k8s.py | 6 +++--- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b16ced..22cd274 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ Every user-facing change should have an entry in this changelog. Please respect - [Fix] `tutor dev quickstart` would fail under certain versions of docker-compose due to a bug in the logic that handled volume mounting. (by @kdmccormick) - [Bugfix] The `tutor k8s start` command will succeed even when `k8s-override` and `kustomization-patches-strategic-merge` are not specified. (by @edazzocaisser) +- [Fix] `kubectl wait` checks deployments instead of pods as it could hang indefinitely if there are extra pods in a broken state. (by @keithgg) ## v14.0.3 (2022-07-09) diff --git a/tutor/commands/k8s.py b/tutor/commands/k8s.py index ea82ec8..0a855dc 100644 --- a/tutor/commands/k8s.py +++ b/tutor/commands/k8s.py @@ -332,10 +332,10 @@ def delete(context: K8sContext, yes: bool) -> None: def init(context: K8sContext, limit: Optional[str]) -> None: config = tutor_config.load(context.root) runner = context.job_runner(config) - wait_for_pod_ready(config, "caddy") + wait_for_deployment_ready(config, "caddy") for name in ["elasticsearch", "mysql", "mongodb"]: if tutor_config.is_service_activated(config, name): - wait_for_pod_ready(config, name) + wait_for_deployment_ready(config, name) jobs.initialise(runner, limit_to=limit) @@ -458,7 +458,7 @@ def logs( @click.pass_obj def wait(context: K8sContext, name: str) -> None: config = tutor_config.load(context.root) - wait_for_pod_ready(config, name) + wait_for_deployment_ready(config, name) @click.command( @@ -536,14 +536,14 @@ def kubectl_exec(config: Config, service: str, command: List[str]) -> int: ) -def wait_for_pod_ready(config: Config, service: str) -> None: - fmt.echo_info(f"Waiting for a {service} pod to be ready...") +def wait_for_deployment_ready(config: Config, service: str) -> None: + fmt.echo_info(f"Waiting for a {service} deployment to be ready...") utils.kubectl( "wait", *resource_selector(config, f"app.kubernetes.io/name={service}"), - "--for=condition=ContainersReady", + "--for=condition=Available=True", "--timeout=600s", - "pod", + "deployment", ) diff --git a/tutor/commands/upgrade/k8s.py b/tutor/commands/upgrade/k8s.py index 0cce122..af970a0 100644 --- a/tutor/commands/upgrade/k8s.py +++ b/tutor/commands/upgrade/k8s.py @@ -120,7 +120,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None: "--selector", "app.kubernetes.io/name=mysql", ) - k8s.wait_for_pod_ready(config, "mysql") + k8s.wait_for_deployment_ready(config, "mysql") # lms upgrade k8s.kubectl_apply( @@ -128,7 +128,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None: "--selector", "app.kubernetes.io/name=lms", ) - k8s.wait_for_pod_ready(config, "lms") + k8s.wait_for_deployment_ready(config, "lms") # Command backpopulate_user_tours k8s.kubectl_exec( @@ -144,7 +144,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None: "--selector", "app.kubernetes.io/name=cms", ) - k8s.wait_for_pod_ready(config, "cms") + k8s.wait_for_deployment_ready(config, "cms") # Command backfill_course_tabs k8s.kubectl_exec( From 6a3138152f46de1f4a9faf087a11875e9211e288 Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Wed, 20 Jul 2022 17:20:43 -0400 Subject: [PATCH 2/3] fix: accurately log shell commands when running jobs Whenever Tutor executes a shell command, it logs out said command in order to aid in end user understanding/debugging. In some cases (notably, when running jobs in containers) the logged command was not accurately quoted. The command was run correctly, because it was passed in pieces to ``subprocess.Popen``, which correctly joins the pieces together into a valid POSIX shell command; however, the logged version of the command was constructed by simply joining the pieces with spaces. This usually works, but breaks down when running complex shell commands with nested quoting. This commit changes the logging to use ``shlex.join``, which joins command pieces together in a POSIX-compliant way, presumably the same way as ``subprocess.Popen``. Example: tutor local importdemocourse runs the shell command: docker-compose -f /home/kyle/.local/share/tutor/env/local/docker-compose.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.prod.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.tmp.yml --project-name tutor_local -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.tmp.yml run --rm cms-job sh -e -c 'echo "Loading settings $DJANGO_SE... (several more script lines) ...eindex_course --all --setup' but the logged shell command was: docker-compose -f /home/kyle/.local/share/tutor/env/local/docker-compose.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.prod.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.tmp.yml --project-name tutor_local -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.tmp.yml run --rm cms-job sh -e -c echo "Loading settings $DJANGO_SE... (several more script lines) ...eindex_course --all --setup which will not run if copied and pasted back into the user's terminal, as the importdemocourse shell script is unquoted. --- CHANGELOG.md | 1 + tests/test_utils.py | 22 +++++++++++++++++++--- tutor/utils.py | 3 ++- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22cd274..3fbbd85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Every user-facing change should have an entry in this changelog. Please respect ## Unreleased +- [Bugfix] Log the shell commands that Tutor executes more accurately. (by @kdmccormick) - [Fix] `tutor dev quickstart` would fail under certain versions of docker-compose due to a bug in the logic that handled volume mounting. (by @kdmccormick) - [Bugfix] The `tutor k8s start` command will succeed even when `k8s-override` and `kustomization-patches-strategic-merge` are not specified. (by @edazzocaisser) - [Fix] `kubectl wait` checks deployments instead of pods as it could hang indefinitely if there are extra pods in a broken state. (by @keithgg) diff --git a/tests/test_utils.py b/tests/test_utils.py index 76d81da..8563ba5 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -102,7 +102,23 @@ class UtilsTests(unittest.TestCase): result = utils.execute("echo", "") self.assertEqual(0, result) - self.assertEqual("echo \n", mock_stdout.getvalue()) + self.assertEqual("echo ''\n", mock_stdout.getvalue()) + self.assertEqual(1, process.wait.call_count) + process.kill.assert_not_called() + + @patch("sys.stdout", new_callable=StringIO) + @patch("subprocess.Popen", autospec=True) + def test_execute_nested_command( + self, mock_popen: MagicMock, mock_stdout: StringIO + ) -> None: + process = mock_popen.return_value + mock_popen.return_value.__enter__.return_value = process + process.wait.return_value = 0 + process.communicate.return_value = ("output", "error") + + result = utils.execute("bash", "-c", "echo -n hi") + self.assertEqual(0, result) + self.assertEqual("bash -c 'echo -n hi'\n", mock_stdout.getvalue()) self.assertEqual(1, process.wait.call_count) process.kill.assert_not_called() @@ -117,7 +133,7 @@ class UtilsTests(unittest.TestCase): process.communicate.return_value = ("output", "error") self.assertRaises(exceptions.TutorError, utils.execute, "echo", "") - self.assertEqual("echo \n", mock_stdout.getvalue()) + self.assertEqual("echo ''\n", mock_stdout.getvalue()) self.assertEqual(1, process.wait.call_count) process.kill.assert_not_called() @@ -131,7 +147,7 @@ class UtilsTests(unittest.TestCase): process.wait.side_effect = ZeroDivisionError("Exception occurred.") self.assertRaises(ZeroDivisionError, utils.execute, "echo", "") - self.assertEqual("echo \n", mock_stdout.getvalue()) + self.assertEqual("echo ''\n", mock_stdout.getvalue()) self.assertEqual(2, process.wait.call_count) process.kill.assert_called_once() diff --git a/tutor/utils.py b/tutor/utils.py index 18a7472..9b53970 100644 --- a/tutor/utils.py +++ b/tutor/utils.py @@ -3,6 +3,7 @@ from functools import lru_cache import json import os import random +import shlex import shutil import string import struct @@ -201,7 +202,7 @@ def is_a_tty() -> bool: def execute(*command: str) -> int: - click.echo(fmt.command(" ".join(command))) + click.echo(fmt.command(shlex.join(command))) with subprocess.Popen(command) as p: try: result = p.wait(timeout=None) From 8345b7ab938190954036a23d42c617d1da18516b Mon Sep 17 00:00:00 2001 From: Kyle McCormick Date: Mon, 25 Jul 2022 11:34:13 -0400 Subject: [PATCH 3/3] temp: shim shlex.join for Python 3.7 compatability --- tutor/utils.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tutor/utils.py b/tutor/utils.py index 9b53970..fcbd29d 100644 --- a/tutor/utils.py +++ b/tutor/utils.py @@ -202,7 +202,7 @@ def is_a_tty() -> bool: def execute(*command: str) -> int: - click.echo(fmt.command(shlex.join(command))) + click.echo(fmt.command(_shlex_join(*command))) with subprocess.Popen(command) as p: try: result = p.wait(timeout=None) @@ -221,6 +221,19 @@ def execute(*command: str) -> int: return result +def _shlex_join(*split_command: str) -> str: + """ + Return a shell-escaped string from *split_command. + + TODO: REMOVE THIS FUNCTION AFTER 2023-06-27. + This function is a shim for the ``shlex.join`` standard library function, + which becomes available in Python 3.8 The end-of-life date for Python 3.7 + is in Jan 2023 (https://endoflife.date/python). After that point, it + would be good to delete this function and just use Py3.8's ``shlex.join``. + """ + return " ".join(shlex.quote(arg) for arg in split_command) + + def check_output(*command: str) -> bytes: literal_command = " ".join(command) click.echo(fmt.command(literal_command))