From 55bc4c63fab03276931aed398b251cdd2720ceb0 Mon Sep 17 00:00:00 2001
From: Keith Grootboom <keith@opencraft.com>
Date: Thu, 21 Jul 2022 15:15:08 +0200
Subject: [PATCH 1/3] fix: change kubectl wait to look at deployments not pods

When waiting for pods, it's possible that the deployment may be
complete but, because other pods may have been Evicted or Killed, the
wait wait condition completes.
---
 CHANGELOG.md                  |  1 +
 tutor/commands/k8s.py         | 14 +++++++-------
 tutor/commands/upgrade/k8s.py |  6 +++---
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7b16ced..22cd274 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ Every user-facing change should have an entry in this changelog. Please respect
 
 - [Fix] `tutor dev quickstart` would fail under certain versions of docker-compose due to a bug in the logic that handled volume mounting. (by @kdmccormick)
 - [Bugfix] The `tutor k8s start` command will succeed even when `k8s-override` and `kustomization-patches-strategic-merge` are not specified. (by @edazzocaisser)
+- [Fix] `kubectl wait` checks deployments instead of pods as it could hang indefinitely if there are extra pods in a broken state. (by @keithgg)
 
 ## v14.0.3 (2022-07-09)
 
diff --git a/tutor/commands/k8s.py b/tutor/commands/k8s.py
index ea82ec8..0a855dc 100644
--- a/tutor/commands/k8s.py
+++ b/tutor/commands/k8s.py
@@ -332,10 +332,10 @@ def delete(context: K8sContext, yes: bool) -> None:
 def init(context: K8sContext, limit: Optional[str]) -> None:
     config = tutor_config.load(context.root)
     runner = context.job_runner(config)
-    wait_for_pod_ready(config, "caddy")
+    wait_for_deployment_ready(config, "caddy")
     for name in ["elasticsearch", "mysql", "mongodb"]:
         if tutor_config.is_service_activated(config, name):
-            wait_for_pod_ready(config, name)
+            wait_for_deployment_ready(config, name)
     jobs.initialise(runner, limit_to=limit)
 
 
@@ -458,7 +458,7 @@ def logs(
 @click.pass_obj
 def wait(context: K8sContext, name: str) -> None:
     config = tutor_config.load(context.root)
-    wait_for_pod_ready(config, name)
+    wait_for_deployment_ready(config, name)
 
 
 @click.command(
@@ -536,14 +536,14 @@ def kubectl_exec(config: Config, service: str, command: List[str]) -> int:
     )
 
 
-def wait_for_pod_ready(config: Config, service: str) -> None:
-    fmt.echo_info(f"Waiting for a {service} pod to be ready...")
+def wait_for_deployment_ready(config: Config, service: str) -> None:
+    fmt.echo_info(f"Waiting for a {service} deployment to be ready...")
     utils.kubectl(
         "wait",
         *resource_selector(config, f"app.kubernetes.io/name={service}"),
-        "--for=condition=ContainersReady",
+        "--for=condition=Available=True",
         "--timeout=600s",
-        "pod",
+        "deployment",
     )
 
 
diff --git a/tutor/commands/upgrade/k8s.py b/tutor/commands/upgrade/k8s.py
index 0cce122..af970a0 100644
--- a/tutor/commands/upgrade/k8s.py
+++ b/tutor/commands/upgrade/k8s.py
@@ -120,7 +120,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None:
         "--selector",
         "app.kubernetes.io/name=mysql",
     )
-    k8s.wait_for_pod_ready(config, "mysql")
+    k8s.wait_for_deployment_ready(config, "mysql")
 
     # lms upgrade
     k8s.kubectl_apply(
@@ -128,7 +128,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None:
         "--selector",
         "app.kubernetes.io/name=lms",
     )
-    k8s.wait_for_pod_ready(config, "lms")
+    k8s.wait_for_deployment_ready(config, "lms")
 
     # Command backpopulate_user_tours
     k8s.kubectl_exec(
@@ -144,7 +144,7 @@ def upgrade_from_maple(context: Context, config: Config) -> None:
         "--selector",
         "app.kubernetes.io/name=cms",
     )
-    k8s.wait_for_pod_ready(config, "cms")
+    k8s.wait_for_deployment_ready(config, "cms")
 
     # Command backfill_course_tabs
     k8s.kubectl_exec(

From 6a3138152f46de1f4a9faf087a11875e9211e288 Mon Sep 17 00:00:00 2001
From: Kyle McCormick <kdmc@pm.me>
Date: Wed, 20 Jul 2022 17:20:43 -0400
Subject: [PATCH 2/3] fix: accurately log shell commands when running jobs

Whenever Tutor executes a shell command, it logs out said
command in order to aid in end user understanding/debugging.

In some cases (notably, when running jobs in containers)
the logged command was not accurately quoted. The command
was run correctly, because it was passed in pieces to
``subprocess.Popen``, which correctly joins the pieces together
into a valid POSIX shell command; however, the logged version
of the command was constructed by simply joining the pieces
with spaces. This usually works, but breaks down when running
complex shell commands with nested quoting.

This commit changes the logging to use ``shlex.join``, which
joins command pieces together in a POSIX-compliant way,
presumably the same way as ``subprocess.Popen``.

Example:

    tutor local importdemocourse

runs the shell command:

    docker-compose -f /home/kyle/.local/share/tutor/env/local/docker-compose.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.prod.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.tmp.yml --project-name tutor_local -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.tmp.yml run --rm cms-job sh -e -c 'echo "Loading settings $DJANGO_SE... (several more script lines) ...eindex_course --all --setup'

but the logged shell command was:

    docker-compose -f /home/kyle/.local/share/tutor/env/local/docker-compose.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.prod.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.tmp.yml --project-name tutor_local -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.yml -f /home/kyle/.local/share/tutor/env/local/docker-compose.jobs.tmp.yml run --rm cms-job sh -e -c echo "Loading settings $DJANGO_SE... (several more script lines) ...eindex_course --all --setup

which will not run if copied and pasted back into the
user's terminal, as the importdemocourse shell script is unquoted.
---
 CHANGELOG.md        |  1 +
 tests/test_utils.py | 22 +++++++++++++++++++---
 tutor/utils.py      |  3 ++-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 22cd274..3fbbd85 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ Every user-facing change should have an entry in this changelog. Please respect
 
 ## Unreleased
 
+- [Bugfix] Log the shell commands that Tutor executes more accurately. (by @kdmccormick)
 - [Fix] `tutor dev quickstart` would fail under certain versions of docker-compose due to a bug in the logic that handled volume mounting. (by @kdmccormick)
 - [Bugfix] The `tutor k8s start` command will succeed even when `k8s-override` and `kustomization-patches-strategic-merge` are not specified. (by @edazzocaisser)
 - [Fix] `kubectl wait` checks deployments instead of pods as it could hang indefinitely if there are extra pods in a broken state. (by @keithgg)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 76d81da..8563ba5 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -102,7 +102,23 @@ class UtilsTests(unittest.TestCase):
 
         result = utils.execute("echo", "")
         self.assertEqual(0, result)
-        self.assertEqual("echo \n", mock_stdout.getvalue())
+        self.assertEqual("echo ''\n", mock_stdout.getvalue())
+        self.assertEqual(1, process.wait.call_count)
+        process.kill.assert_not_called()
+
+    @patch("sys.stdout", new_callable=StringIO)
+    @patch("subprocess.Popen", autospec=True)
+    def test_execute_nested_command(
+        self, mock_popen: MagicMock, mock_stdout: StringIO
+    ) -> None:
+        process = mock_popen.return_value
+        mock_popen.return_value.__enter__.return_value = process
+        process.wait.return_value = 0
+        process.communicate.return_value = ("output", "error")
+
+        result = utils.execute("bash", "-c", "echo -n hi")
+        self.assertEqual(0, result)
+        self.assertEqual("bash -c 'echo -n hi'\n", mock_stdout.getvalue())
         self.assertEqual(1, process.wait.call_count)
         process.kill.assert_not_called()
 
@@ -117,7 +133,7 @@ class UtilsTests(unittest.TestCase):
         process.communicate.return_value = ("output", "error")
 
         self.assertRaises(exceptions.TutorError, utils.execute, "echo", "")
-        self.assertEqual("echo \n", mock_stdout.getvalue())
+        self.assertEqual("echo ''\n", mock_stdout.getvalue())
         self.assertEqual(1, process.wait.call_count)
         process.kill.assert_not_called()
 
@@ -131,7 +147,7 @@ class UtilsTests(unittest.TestCase):
         process.wait.side_effect = ZeroDivisionError("Exception occurred.")
 
         self.assertRaises(ZeroDivisionError, utils.execute, "echo", "")
-        self.assertEqual("echo \n", mock_stdout.getvalue())
+        self.assertEqual("echo ''\n", mock_stdout.getvalue())
         self.assertEqual(2, process.wait.call_count)
         process.kill.assert_called_once()
 
diff --git a/tutor/utils.py b/tutor/utils.py
index 18a7472..9b53970 100644
--- a/tutor/utils.py
+++ b/tutor/utils.py
@@ -3,6 +3,7 @@ from functools import lru_cache
 import json
 import os
 import random
+import shlex
 import shutil
 import string
 import struct
@@ -201,7 +202,7 @@ def is_a_tty() -> bool:
 
 
 def execute(*command: str) -> int:
-    click.echo(fmt.command(" ".join(command)))
+    click.echo(fmt.command(shlex.join(command)))
     with subprocess.Popen(command) as p:
         try:
             result = p.wait(timeout=None)

From 8345b7ab938190954036a23d42c617d1da18516b Mon Sep 17 00:00:00 2001
From: Kyle McCormick <kdmc@pm.me>
Date: Mon, 25 Jul 2022 11:34:13 -0400
Subject: [PATCH 3/3] temp: shim shlex.join for Python 3.7 compatability

---
 tutor/utils.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/tutor/utils.py b/tutor/utils.py
index 9b53970..fcbd29d 100644
--- a/tutor/utils.py
+++ b/tutor/utils.py
@@ -202,7 +202,7 @@ def is_a_tty() -> bool:
 
 
 def execute(*command: str) -> int:
-    click.echo(fmt.command(shlex.join(command)))
+    click.echo(fmt.command(_shlex_join(*command)))
     with subprocess.Popen(command) as p:
         try:
             result = p.wait(timeout=None)
@@ -221,6 +221,19 @@ def execute(*command: str) -> int:
     return result
 
 
+def _shlex_join(*split_command: str) -> str:
+    """
+    Return a shell-escaped string from *split_command.
+
+    TODO: REMOVE THIS FUNCTION AFTER 2023-06-27.
+      This function is a shim for the ``shlex.join`` standard library function,
+      which becomes available in Python 3.8  The end-of-life date for Python 3.7
+      is in Jan 2023 (https://endoflife.date/python). After that point, it
+      would be good to delete this function and just use Py3.8's ``shlex.join``.
+    """
+    return " ".join(shlex.quote(arg) for arg in split_command)
+
+
 def check_output(*command: str) -> bytes:
     literal_command = " ".join(command)
     click.echo(fmt.command(literal_command))