tutor/tutor/commands/k8s.py

from datetime import datetime
from time import sleep
from typing import Any, List, Optional, Type

import click

from .. import config as tutor_config
from .. import env as tutor_env
from .. import exceptions, fmt, jobs, serialize, utils
from ..types import Config, get_typed
from .config import save as config_save_command
from .context import Context


class K8sClients:
    _instance = None

    def __init__(self) -> None:
        # Loading the kubernetes module here to avoid import overhead
        from kubernetes import client, config  # pylint: disable=import-outside-toplevel

        config.load_kube_config()
        self._batch_api = None
        self._core_api = None
        self._client = client

    @classmethod
    def instance(cls: Type["K8sClients"]) -> "K8sClients":
        if cls._instance is None:
            cls._instance = cls()
        return cls._instance

    @property
    def batch_api(self):  # type: ignore
        if self._batch_api is None:
            self._batch_api = self._client.BatchV1Api()
        return self._batch_api

    @property
    def core_api(self):  # type: ignore
        if self._core_api is None:
            self._core_api = self._client.CoreV1Api()
        return self._core_api


class K8sJobRunner(jobs.BaseJobRunner):
    def load_job(self, name: str) -> Any:
        all_jobs = self.render("k8s", "jobs.yml")
        for job in serialize.load_all(all_jobs):
            job_name = job["metadata"]["name"]
            if not isinstance(job_name, str):
                raise exceptions.TutorError(
                    "Invalid job name: '{}'. Expected str.".format(job_name)
                )
            if job_name == name:
                return job
        raise exceptions.TutorError("Could not find job '{}'".format(name))

    def active_job_names(self) -> List[str]:
        """
        Return a list of active job names
        Docs:
        https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#list-job-v1-batch
        """
        api = K8sClients.instance().batch_api
        return [
            job.metadata.name
            for job in api.list_namespaced_job(k8s_namespace(self.config)).items
            if job.status.active
        ]

    def run_job(self, service: str, command: str) -> int:
        job_name = "{}-job".format(service)
        job = self.load_job(job_name)
        # Create a unique job name to make it deduplicate jobs and make it easier to
        # find later. Logs of older jobs will remain available for some time.
        job_name += "-" + datetime.now().strftime("%Y%m%d%H%M%S")

        # Wait until all other jobs are completed
        while True:
            active_jobs = self.active_job_names()
            if not active_jobs:
                break
            fmt.echo_info(
                "Waiting for active jobs to terminate: {}".format(" ".join(active_jobs))
            )
            sleep(5)

        # Configure job
        job["metadata"]["name"] = job_name
        job["metadata"].setdefault("labels", {})
        job["metadata"]["labels"]["app.kubernetes.io/name"] = job_name
        # Define k8s entrypoint/args
        shell_command = ["sh", "-e", "-c"]
        if job["spec"]["template"]["spec"]["containers"][0].get("command") == []:
            # In some cases, we need to bypass the container entrypoint.
            # Unfortunately, AFAIK, there is no way to do so in K8s manifests. So we mark
            # some jobs with "command: []". For these jobs, the entrypoint becomes "sh -e -c".
            # We do not do this for every job, because some (most) entrypoints are actually useful.
            job["spec"]["template"]["spec"]["containers"][0]["command"] = shell_command
            container_args = [command]
        else:
            container_args = shell_command + [command]
        job["spec"]["template"]["spec"]["containers"][0]["args"] = container_args
        job["spec"]["backoffLimit"] = 1
        job["spec"]["ttlSecondsAfterFinished"] = 3600
        # Save patched job to "jobs.yml" file
        with open(tutor_env.pathjoin(self.root, "k8s", "jobs.yml"), "w") as job_file:
            serialize.dump(job, job_file)
        # We cannot use the k8s API to create the job: configMap and volume names need
        # to be found with the right suffixes.
        utils.kubectl(
            "apply",
            "--kustomize",
            tutor_env.pathjoin(self.root),
            "--selector",
            "app.kubernetes.io/name={}".format(job_name),
        )

        message = (
            "Job {job_name} is running. To view the logs from this job, run:\n\n"
            """    kubectl logs --namespace={namespace} --follow $(kubectl get --namespace={namespace} pods """
            """--selector=job-name={job_name} -o=jsonpath="{{.items[0].metadata.name}}")\n\n"""
            "Waiting for job completion..."
        ).format(job_name=job_name, namespace=k8s_namespace(self.config))
        fmt.echo_info(message)

        # Wait for completion
        field_selector = "metadata.name={}".format(job_name)
        while True:
            namespaced_jobs = K8sClients.instance().batch_api.list_namespaced_job(
                k8s_namespace(self.config), field_selector=field_selector
            )
            if not namespaced_jobs.items:
                continue
            job = namespaced_jobs.items[0]
            if not job.status.active:
                if job.status.succeeded:
                    fmt.echo_info("Job {} successful.".format(job_name))
                    break
                if job.status.failed:
                    raise exceptions.TutorError(
                        "Job {} failed. View the job logs to debug this issue.".format(
                            job_name
                        )
                    )
            sleep(5)
        return 0


@click.group(help="Run Open edX on Kubernetes")
def k8s() -> None:
    pass


@click.command(help="Configure and run Open edX from scratch")
@click.option("-I", "--non-interactive", is_flag=True, help="Run non-interactively")
@click.pass_context
def quickstart(context: click.Context, non_interactive: bool) -> None:
    click.echo(fmt.title("Interactive platform configuration"))
    context.invoke(
        config_save_command,
        interactive=(not non_interactive),
        set_vars=[],
        unset_vars=[],
    )
    config = tutor_config.load(context.obj.root)
    if not config["ENABLE_WEB_PROXY"]:
        fmt.echo_alert(
            "Potentially invalid configuration: ENABLE_WEB_PROXY=false\n"
            "This setting might have been defined because you previously set WEB_PROXY=true. This is no longer"
            " necessary in order to get Tutor to work on Kubernetes. In Tutor v11+ a Caddy-based load balancer is"
            " provided out of the box to handle SSL/TLS certificate generation at runtime. If you disable this"
            " service, you will have to configure an Ingress resource and a certificate manager yourself to redirect"
            " traffic to the caddy service. See the Kubernetes section in the Tutor documentation for more"
            " information."
        )
    click.echo(fmt.title("Updating the current environment"))
    tutor_env.save(context.obj.root, config)
    click.echo(fmt.title("Starting the platform"))
    context.invoke(start)
    click.echo(fmt.title("Database creation and migrations"))
    context.invoke(init, limit=None)
    fmt.echo_info(
        """Your Open edX platform is ready and can be accessed at the following urls:

    {http}://{lms_host}
    {http}://{cms_host}
    """.format(
            http="https" if config["ENABLE_HTTPS"] else "http",
            lms_host=config["LMS_HOST"],
            cms_host=config["CMS_HOST"],
        )
    )


@click.command(
    short_help="Run all configured Open edX resources",
    help=(
        "Run all configured Open edX resources. You may limit this command to "
        "some resources by passing name arguments."
    ),
)
@click.argument("names", metavar="name", nargs=-1)
@click.pass_obj
def start(context: Context, names: List[str]) -> None:
    config = tutor_config.load(context.root)
    # Create namespace, if necessary
    # Note that this step should not be run for some users, in particular those
    # who do not have permission to edit the namespace.
    try:
        utils.kubectl("get", "namespaces", k8s_namespace(config))
        fmt.echo_info("Namespace already exists: skipping creation.")
    except exceptions.TutorError:
        fmt.echo_info("Namespace does not exist: now creating it...")
        utils.kubectl(
            "apply",
            "--kustomize",
            tutor_env.pathjoin(context.root),
            "--wait",
            "--selector",
            "app.kubernetes.io/component=namespace",
        )

    names = names or ["all"]
    for name in names:
        if name == "all":
            # Create volumes
            utils.kubectl(
                "apply",
                "--kustomize",
                tutor_env.pathjoin(context.root),
                "--wait",
                "--selector",
                "app.kubernetes.io/component=volume",
            )
            # Create everything else except jobs
            utils.kubectl(
                "apply",
                "--kustomize",
                tutor_env.pathjoin(context.root),
                "--selector",
                "app.kubernetes.io/component notin (job,volume,namespace)",
            )
        else:
            utils.kubectl(
                "apply",
                "--kustomize",
                tutor_env.pathjoin(context.root),
                "--selector",
                "app.kubernetes.io/name={}".format(name),
            )


@click.command(
    short_help="Stop a running platform",
    help=(
        "Stop a running platform by deleting all resources, except for volumes. "
        "You may limit this command to some resources by passing name arguments."
    ),
)
@click.argument("names", metavar="name", nargs=-1)
@click.pass_obj
def stop(context: Context, names: List[str]) -> None:
    config = tutor_config.load(context.root)
    names = names or ["all"]
    resource_types = "deployments,services,configmaps,jobs"
    not_lb_selector = "app.kubernetes.io/component!=loadbalancer"
    for name in names:
        if name == "all":
            utils.kubectl(
                "delete",
                *resource_selector(config, not_lb_selector),
                resource_types,
            )
        else:
            utils.kubectl(
                "delete",
                *resource_selector(
                    config,
                    not_lb_selector,
                    "app.kubernetes.io/name={}".format(name),
                ),
                resource_types,
            )


@click.command(help="Reboot an existing platform")
@click.pass_context
def reboot(context: click.Context) -> None:
    context.invoke(stop)
    context.invoke(start)


@click.command(help="Completely delete an existing platform")
@click.option("-y", "--yes", is_flag=True, help="Do not ask for confirmation")
@click.pass_obj
def delete(context: Context, yes: bool) -> None:
    if not yes:
        click.confirm(
            "Are you sure you want to delete the platform? All data will be removed.",
            abort=True,
        )
    utils.kubectl(
        "delete",
        "-k",
        tutor_env.pathjoin(context.root),
        "--ignore-not-found=true",
        "--wait",
    )


@click.command(help="Initialise all applications")
@click.option("-l", "--limit", help="Limit initialisation to this service or plugin")
@click.pass_obj
def init(context: Context, limit: Optional[str]) -> None:
    config = tutor_config.load(context.root)
    runner = K8sJobRunner(context.root, config)
    wait_for_pod_ready(config, "caddy")
    for name in ["elasticsearch", "mysql", "mongodb"]:
        if tutor_config.is_service_activated(config, name):
            wait_for_pod_ready(config, name)
    jobs.initialise(runner, limit_to=limit)


@click.command(help="Scale the number of replicas of a given deployment")
@click.argument("deployment")
@click.argument("replicas", type=int)
@click.pass_obj
def scale(context: Context, deployment: str, replicas: int) -> None:
    config = tutor_config.load(context.root)
    utils.kubectl(
        "scale",
        # Note that we don't use the full resource selector because selectors
        # are not compatible with the deployment/<name> argument.
        *resource_namespace_selector(
            config,
        ),
        "--replicas={}".format(replicas),
        "deployment/{}".format(deployment),
    )


@click.command(help="Create an Open edX user and interactively set their password")
@click.option("--superuser", is_flag=True, help="Make superuser")
@click.option("--staff", is_flag=True, help="Make staff user")
@click.option(
    "-p",
    "--password",
    help="Specify password from the command line. If undefined, you will be prompted to input a password",
)
@click.argument("name")
@click.argument("email")
@click.pass_obj
def createuser(
    context: Context, superuser: str, staff: bool, password: str, name: str, email: str
) -> None:
    config = tutor_config.load(context.root)
    command = jobs.create_user_command(superuser, staff, name, email, password=password)
    # This needs to be interactive in case the user needs to type a password
    kubectl_exec(config, "lms", command, attach=True)


@click.command(help="Import the demo course")
@click.pass_obj
def importdemocourse(context: Context) -> None:
    fmt.echo_info("Importing demo course")
    config = tutor_config.load(context.root)
    runner = K8sJobRunner(context.root, config)
    jobs.import_demo_course(runner)


@click.command(
    help="Assign a theme to the LMS and the CMS. To reset to the default theme , use 'default' as the theme name."
)
@click.option(
    "-d",
    "--domain",
    "domains",
    multiple=True,
    help=(
        "Limit the theme to these domain names. By default, the theme is "
        "applied to the LMS and the CMS, both in development and production mode"
    ),
)
@click.argument("theme_name")
@click.pass_obj
def settheme(context: Context, domains: List[str], theme_name: str) -> None:
    config = tutor_config.load(context.root)
    runner = K8sJobRunner(context.root, config)
    domains = domains or jobs.get_all_openedx_domains(config)
    jobs.set_theme(theme_name, domains, runner)


@click.command(name="exec", help="Execute a command in a pod of the given application")
@click.argument("service")
@click.argument("command")
@click.pass_obj
def exec_command(context: Context, service: str, command: str) -> None:
    config = tutor_config.load(context.root)
    kubectl_exec(config, service, command, attach=True)


@click.command(help="View output from containers")
@click.option("-c", "--container", help="Print the logs of this specific container")
@click.option("-f", "--follow", is_flag=True, help="Follow log output")
@click.option("--tail", type=int, help="Number of lines to show from each container")
@click.argument("service")
@click.pass_obj
def logs(
    context: Context, container: str, follow: bool, tail: bool, service: str
) -> None:
    config = tutor_config.load(context.root)

    command = ["logs"]
    selectors = ["app.kubernetes.io/name=" + service] if service else []
    command += resource_selector(config, *selectors)

    if container:
        command += ["-c", container]
    if follow:
        command += ["--follow"]
    if tail is not None:
        command += ["--tail", str(tail)]

    utils.kubectl(*command)


@click.command(help="Wait for a pod to become ready")
@click.argument("name")
@click.pass_obj
def wait(context: Context, name: str) -> None:
    config = tutor_config.load(context.root)
    wait_for_pod_ready(config, name)


@click.command(help="Upgrade from a previous Open edX named release")
@click.option(
    "--from",
    "from_version",
    default="koa",
    type=click.Choice(["ironwood", "juniper", "koa", "lilac"]),
)
@click.pass_obj
def upgrade(context: Context, from_version: str) -> None:
    config = tutor_config.load(context.root)

    running_version = from_version
    if running_version == "ironwood":
        upgrade_from_ironwood(config)
        running_version = "juniper"

    if running_version == "juniper":
        upgrade_from_juniper(config)
        running_version = "koa"

    if running_version == "koa":
        upgrade_from_koa(config)
        running_version = "lilac"

    if running_version == "lilac":
        # Nothing to do here
        running_version = "maple"


def upgrade_from_ironwood(config: Config) -> None:
    if not config["RUN_MONGODB"]:
        fmt.echo_info(
            "You are not running MongDB (RUN_MONGODB=false). It is your "
            "responsibility to upgrade your MongoDb instance to v3.6. There is "
            "nothing left to do to upgrade from Ironwood."
        )
        return
    message = """Automatic release upgrade is unsupported in Kubernetes. To upgrade from Ironwood, you should upgrade
your MongoDb cluster from v3.2 to v3.6. You should run something similar to:

    # Upgrade from v3.2 to v3.4
    tutor k8s stop
    tutor config save --set DOCKER_IMAGE_MONGODB=mongo:3.4.24
    tutor k8s start
    tutor k8s exec mongodb mongo --eval 'db.adminCommand({ setFeatureCompatibilityVersion: "3.4" })'

    # Upgrade from v3.4 to v3.6
    tutor k8s stop
    tutor config save --set DOCKER_IMAGE_MONGODB=mongo:3.6.18
    tutor k8s start
    tutor k8s exec mongodb mongo --eval 'db.adminCommand({ setFeatureCompatibilityVersion: "3.6" })'

    tutor config save --unset DOCKER_IMAGE_MONGODB"""
    fmt.echo_info(message)


def upgrade_from_juniper(config: Config) -> None:
    if not config["RUN_MYSQL"]:
        fmt.echo_info(
            "You are not running MySQL (RUN_MYSQL=false). It is your "
            "responsibility to upgrade your MySQL instance to v5.7. There is "
            "nothing left to do to upgrade from Juniper."
        )
        return

    message = """Automatic release upgrade is unsupported in Kubernetes. To upgrade from Juniper, you should upgrade
your MySQL database from v5.6 to v5.7. You should run something similar to:

    tutor k8s start
    tutor k8s exec mysql bash -e -c "mysql_upgrade \
        -u $(tutor config printvalue MYSQL_ROOT_USERNAME) \
        --password='$(tutor config printvalue MYSQL_ROOT_PASSWORD)'
"""
    fmt.echo_info(message)


def upgrade_from_koa(config: Config) -> None:
    if not config["RUN_MONGODB"]:
        fmt.echo_info(
            "You are not running MongDB (RUN_MONGODB=false). It is your "
            "responsibility to upgrade your MongoDb instance to v4.0. There is "
            "nothing left to do to upgrade to Lilac from Koa."
        )
        return
    message = """Automatic release upgrade is unsupported in Kubernetes. To upgrade from Koa to Lilac, you should upgrade
your MongoDb cluster from v3.6 to v4.0. You should run something similar to:

    tutor k8s stop
    tutor config save --set DOCKER_IMAGE_MONGODB=mongo:4.0.25
    tutor k8s start
    tutor k8s exec mongodb mongo --eval 'db.adminCommand({ setFeatureCompatibilityVersion: "4.0" })'
    tutor config save --unset DOCKER_IMAGE_MONGODB
    """
    fmt.echo_info(message)


def kubectl_exec(
    config: Config, service: str, command: str, attach: bool = False
) -> int:
    selector = "app.kubernetes.io/name={}".format(service)
    pods = K8sClients.instance().core_api.list_namespaced_pod(
        namespace=k8s_namespace(config), label_selector=selector
    )
    if not pods.items:
        raise exceptions.TutorError(
            "Could not find an active pod for the {} service".format(service)
        )
    pod_name = pods.items[0].metadata.name

    # Run command
    attach_opts = ["-i", "-t"] if attach else []
    return utils.kubectl(
        "exec",
        *attach_opts,
        "--namespace",
        k8s_namespace(config),
        pod_name,
        "--",
        "sh",
        "-e",
        "-c",
        command,
    )


def wait_for_pod_ready(config: Config, service: str) -> None:
    fmt.echo_info("Waiting for a {} pod to be ready...".format(service))
    utils.kubectl(
        "wait",
        *resource_selector(config, "app.kubernetes.io/name={}".format(service)),
        "--for=condition=ContainersReady",
        "--timeout=600s",
        "pod",
    )


def resource_selector(config: Config, *selectors: str) -> List[str]:
    """
    Convenient utility to filter the resources that belong to this project.
    """
    selector = ",".join(
        ["app.kubernetes.io/instance=openedx-" + get_typed(config, "ID", str)]
        + list(selectors)
    )
    return resource_namespace_selector(config) + ["--selector=" + selector]


def resource_namespace_selector(config: Config) -> List[str]:
    """
    Convenient utility to filter the resources that belong to this project namespace.
    """
    return ["--namespace", k8s_namespace(config)]


def k8s_namespace(config: Config) -> str:
    return get_typed(config, "K8S_NAMESPACE", str)


k8s.add_command(quickstart)
k8s.add_command(start)
k8s.add_command(stop)
k8s.add_command(reboot)
k8s.add_command(delete)
k8s.add_command(init)
k8s.add_command(scale)
k8s.add_command(createuser)
k8s.add_command(importdemocourse)
k8s.add_command(settheme)
k8s.add_command(exec_command)
k8s.add_command(logs)
k8s.add_command(wait)
k8s.add_command(upgrade)