from datetime import datetime from time import sleep from typing import Any, List, Optional, Type import click from tutor import config as tutor_config from tutor import env as tutor_env from tutor import exceptions, fmt, hooks from tutor import interactive as interactive_config from tutor import serialize, utils from tutor.commands import jobs from tutor.commands.config import save as config_save_command from tutor.commands.context import BaseTaskContext from tutor.commands.upgrade import OPENEDX_RELEASE_NAMES from tutor.commands.upgrade.k8s import upgrade_from from tutor.tasks import BaseTaskRunner from tutor.types import Config, get_typed class K8sClients: _instance = None def __init__(self) -> None: # Loading the kubernetes module here to avoid import overhead # pylint: disable=import-outside-toplevel from kubernetes import client, config config.load_kube_config() self._batch_api = None self._core_api = None self._client = client @classmethod def instance(cls: Type["K8sClients"]) -> "K8sClients": if cls._instance is None: cls._instance = cls() return cls._instance @property def batch_api(self): # type: ignore if self._batch_api is None: self._batch_api = self._client.BatchV1Api() return self._batch_api @property def core_api(self): # type: ignore if self._core_api is None: self._core_api = self._client.CoreV1Api() return self._core_api class K8sTaskRunner(BaseTaskRunner): """ Run tasks (bash commands) in Kubernetes-managed services. Note: a single Tutor "task" correspond to a Kubernetes "job": https://kubernetes.io/docs/concepts/workloads/controllers/job/ A Tutor "job" is composed of multiple Tutor tasks run in different services. In Kubernetes, each task that is expected to run in a "myservice" container will trigger the "myservice-job" Kubernetes job. This job definition must be present in the "k8s/jobs.yml" template. """ def run_task(self, service: str, command: str) -> int: job_name = f"{service}-job" job = self.load_job(job_name) # Create a unique job name to make it deduplicate jobs and make it easier to # find later. Logs of older jobs will remain available for some time. job_name += "-" + datetime.now().strftime("%Y%m%d%H%M%S") # Wait until all other jobs are completed while True: active_jobs = self.active_job_names() if not active_jobs: break fmt.echo_info( f"Waiting for active jobs to terminate: {' '.join(active_jobs)}" ) sleep(5) # Configure job job["metadata"]["name"] = job_name job["metadata"].setdefault("labels", {}) job["metadata"]["labels"]["app.kubernetes.io/name"] = job_name # Define k8s entrypoint/args shell_command = ["sh", "-e", "-c"] if job["spec"]["template"]["spec"]["containers"][0].get("command") == []: # In some cases, we need to bypass the container entrypoint. # Unfortunately, AFAIK, there is no way to do so in K8s manifests. So we mark # some jobs with "command: []". For these jobs, the entrypoint becomes "sh -e -c". # We do not do this for every job, because some (most) entrypoints are actually useful. job["spec"]["template"]["spec"]["containers"][0]["command"] = shell_command container_args = [command] else: container_args = shell_command + [command] job["spec"]["template"]["spec"]["containers"][0]["args"] = container_args job["spec"]["backoffLimit"] = 1 job["spec"]["ttlSecondsAfterFinished"] = 3600 # Save patched job to "jobs.yml" file with open( tutor_env.pathjoin(self.root, "k8s", "jobs.yml"), "w", encoding="utf-8" ) as job_file: serialize.dump(job, job_file) # We cannot use the k8s API to create the job: configMap and volume names need # to be found with the right suffixes. kubectl_apply( self.root, "--selector", f"app.kubernetes.io/name={job_name}", ) message = ( "Job {job_name} is running. To view the logs from this job, run:\n\n" """ kubectl logs --namespace={namespace} --follow $(kubectl get --namespace={namespace} pods """ """--selector=job-name={job_name} -o=jsonpath="{{.items[0].metadata.name}}")\n\n""" "Waiting for job completion..." ).format(job_name=job_name, namespace=k8s_namespace(self.config)) fmt.echo_info(message) # Wait for completion field_selector = f"metadata.name={job_name}" while True: namespaced_jobs = K8sClients.instance().batch_api.list_namespaced_job( k8s_namespace(self.config), field_selector=field_selector ) if not namespaced_jobs.items: continue job = namespaced_jobs.items[0] if not job.status.active: if job.status.succeeded: fmt.echo_info(f"Job {job_name} successful.") break if job.status.failed: raise exceptions.TutorError( f"Job {job_name} failed. View the job logs to debug this issue." ) sleep(5) return 0 def load_job(self, name: str) -> Any: """ Find a given job definition in the rendered k8s/jobs.yml template. """ all_jobs = self.render("k8s", "jobs.yml") for job in serialize.load_all(all_jobs): job_name = job["metadata"]["name"] if not isinstance(job_name, str): raise exceptions.TutorError( f"Invalid job name: '{job_name}'. Expected str." ) if job_name == name: return job raise exceptions.TutorError(f"Could not find job '{name}'") def active_job_names(self) -> List[str]: """ Return a list of active job names Docs: https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#list-job-v1-batch This is necessary to make sure that we don't run the same job multiple times at the same time. """ api = K8sClients.instance().batch_api return [ job.metadata.name for job in api.list_namespaced_job(k8s_namespace(self.config)).items if job.status.active ] class K8sContext(BaseTaskContext): def job_runner(self, config: Config) -> K8sTaskRunner: return K8sTaskRunner(self.root, config) @click.group(help="Run Open edX on Kubernetes") @click.pass_context def k8s(context: click.Context) -> None: context.obj = K8sContext(context.obj.root) @click.command(help="Configure and run Open edX from scratch") @click.option("-I", "--non-interactive", is_flag=True, help="Run non-interactively") @click.pass_context def launch(context: click.Context, non_interactive: bool) -> None: run_upgrade_from_release = tutor_env.should_upgrade_from_release(context.obj.root) if run_upgrade_from_release is not None: click.echo(fmt.title("Upgrading from an older release")) context.invoke( upgrade, from_release=tutor_env.get_env_release(context.obj.root), ) click.echo(fmt.title("Interactive platform configuration")) config = tutor_config.load_minimal(context.obj.root) if not non_interactive: interactive_config.ask_questions(config, run_for_prod=True) tutor_config.save_config_file(context.obj.root, config) config = tutor_config.load_full(context.obj.root) tutor_env.save(context.obj.root, config) if run_upgrade_from_release and not non_interactive: question = f"""Your platform is being upgraded from {run_upgrade_from_release.capitalize()}. If you run custom Docker images, you must rebuild and push them to your private repository now by running the following commands in a different shell: tutor images build all # add your custom images here tutor images push all Press enter when you are ready to continue""" click.confirm( fmt.question(question), default=True, abort=True, prompt_suffix=" " ) click.echo(fmt.title("Starting the platform")) context.invoke(start) click.echo(fmt.title("Database creation and migrations")) context.invoke(init, limit=None) config = tutor_config.load(context.obj.root) fmt.echo_info( """Your Open edX platform is ready and can be accessed at the following urls: {http}://{lms_host} {http}://{cms_host} """.format( http="https" if config["ENABLE_HTTPS"] else "http", lms_host=config["LMS_HOST"], cms_host=config["CMS_HOST"], ) ) @click.command( short_help="Run all configured Open edX resources", help=( "Run all configured Open edX resources. You may limit this command to " "some resources by passing name arguments." ), ) @click.argument("names", metavar="name", nargs=-1) @click.pass_obj def start(context: K8sContext, names: List[str]) -> None: config = tutor_config.load(context.root) # Create namespace, if necessary # Note that this step should not be run for some users, in particular those # who do not have permission to edit the namespace. try: utils.kubectl("get", "namespaces", k8s_namespace(config)) fmt.echo_info("Namespace already exists: skipping creation.") except exceptions.TutorError: fmt.echo_info("Namespace does not exist: now creating it...") kubectl_apply( context.root, "--wait", "--selector", "app.kubernetes.io/component=namespace", ) names = names or ["all"] for name in names: if name == "all": # Create volumes kubectl_apply( context.root, "--wait", "--selector", "app.kubernetes.io/component=volume", ) # Create everything else except jobs kubectl_apply( context.root, "--selector", "app.kubernetes.io/component notin (job,volume,namespace)", ) else: kubectl_apply( context.root, "--selector", f"app.kubernetes.io/name={name}", ) @click.command( short_help="Stop a running platform", help=( "Stop a running platform by deleting all resources, except for volumes. " "You may limit this command to some resources by passing name arguments." ), ) @click.argument("names", metavar="name", nargs=-1) @click.pass_obj def stop(context: K8sContext, names: List[str]) -> None: config = tutor_config.load(context.root) names = names or ["all"] for name in names: if name == "all": delete_resources(config) else: delete_resources(config, name=name) def delete_resources( config: Config, resources: Optional[List[str]] = None, name: Optional[str] = None ) -> None: """ Delete resources by type and name. The load balancer is never deleted. """ resources = resources or ["deployments", "services", "configmaps", "jobs"] not_lb_selector = "app.kubernetes.io/component!=loadbalancer" name_selector = [f"app.kubernetes.io/name={name}"] if name else [] utils.kubectl( "delete", *resource_selector(config, not_lb_selector, *name_selector), ",".join(resources), ) @click.command(help="Reboot an existing platform") @click.pass_context def reboot(context: click.Context) -> None: context.invoke(stop) context.invoke(start) @click.command(help="Completely delete an existing platform") @click.option("-y", "--yes", is_flag=True, help="Do not ask for confirmation") @click.pass_obj def delete(context: K8sContext, yes: bool) -> None: if not yes: click.confirm( "Are you sure you want to delete the platform? All data will be removed.", abort=True, ) utils.kubectl( "delete", "-k", tutor_env.pathjoin(context.root), "--ignore-not-found=true", "--wait", ) @jobs.do_group @click.pass_obj def do(context: K8sContext) -> None: """ Run a custom job in the right container(s). We make sure that some essential containers (databases, proxy) are up before we launch the jobs. """ @hooks.Actions.DO_JOB.add() def _start_base_deployments(_job_name: str, *_args: Any, **_kwargs: Any) -> None: """ We add this logic to an action callback because we do not want to trigger it whenever we run `tutor k8s do --help`. """ config = tutor_config.load(context.root) wait_for_deployment_ready(config, "caddy") for name in ["elasticsearch", "mysql", "mongodb"]: if tutor_config.is_service_activated(config, name): wait_for_deployment_ready(config, name) @click.command(help="Initialise all applications") @click.option("-l", "--limit", help="Limit initialisation to this service or plugin") @click.pass_context def init(context: click.Context, limit: Optional[str]) -> None: context.invoke(do.commands["init"], limit=limit) @click.command(help="Scale the number of replicas of a given deployment") @click.argument("deployment") @click.argument("replicas", type=int) @click.pass_obj def scale(context: K8sContext, deployment: str, replicas: int) -> None: config = tutor_config.load(context.root) utils.kubectl( "scale", # Note that we don't use the full resource selector because selectors # are not compatible with the deployment/ argument. *resource_namespace_selector( config, ), f"--replicas={replicas}", f"deployment/{deployment}", ) @click.command( name="exec", help="Execute a command in a pod of the given application", context_settings={"ignore_unknown_options": True}, ) @click.argument("service") @click.argument("args", nargs=-1, required=True) @click.pass_obj def exec_command(context: K8sContext, service: str, args: List[str]) -> None: config = tutor_config.load(context.root) kubectl_exec(config, service, args) @click.command(help="View output from containers") @click.option("-c", "--container", help="Print the logs of this specific container") @click.option("-f", "--follow", is_flag=True, help="Follow log output") @click.option("--tail", type=int, help="Number of lines to show from each container") @click.argument("service") @click.pass_obj def logs( context: K8sContext, container: str, follow: bool, tail: bool, service: str ) -> None: config = tutor_config.load(context.root) command = ["logs"] selectors = ["app.kubernetes.io/name=" + service] if service else [] command += resource_selector(config, *selectors) if container: command += ["-c", container] if follow: command += ["--follow"] if tail is not None: command += ["--tail", str(tail)] utils.kubectl(*command) @click.command(help="Wait for a pod to become ready") @click.argument("name") @click.pass_obj def wait(context: K8sContext, name: str) -> None: config = tutor_config.load(context.root) wait_for_deployment_ready(config, name) @click.command( short_help="Perform release-specific upgrade tasks", help="Perform release-specific upgrade tasks. To perform a full upgrade remember to run `launch`.", ) @click.option( "--from", "from_release", type=click.Choice(OPENEDX_RELEASE_NAMES), ) @click.pass_context def upgrade(context: click.Context, from_release: Optional[str]) -> None: if from_release is None: from_release = tutor_env.get_env_release(context.obj.root) if from_release is None: fmt.echo_info("Your environment is already up-to-date") else: fmt.echo_alert( "This command only performs a partial upgrade of your Open edX platform. " "To perform a full upgrade, you should run `tutor k8s launch`." ) upgrade_from(context, from_release) # We update the environment to update the version context.invoke(config_save_command) @click.command( short_help="Direct interface to `kubectl apply`.", help=( "Direct interface to `kubnectl-apply`. This is a wrapper around `kubectl apply`. A;; options and" " arguments passed to this command will be forwarded as-is to `kubectl apply`." ), context_settings={"ignore_unknown_options": True}, name="apply", ) @click.argument("args", nargs=-1) @click.pass_obj def apply_command(context: K8sContext, args: List[str]) -> None: kubectl_apply(context.root, *args) def kubectl_apply(root: str, *args: str) -> None: utils.kubectl("apply", "--kustomize", tutor_env.pathjoin(root), *args) @click.command(help="Print status information for all k8s resources") @click.pass_obj def status(context: K8sContext) -> int: config = tutor_config.load(context.root) return utils.kubectl("get", "all", *resource_namespace_selector(config)) def kubectl_exec(config: Config, service: str, command: List[str]) -> int: selector = f"app.kubernetes.io/name={service}" pods = K8sClients.instance().core_api.list_namespaced_pod( namespace=k8s_namespace(config), label_selector=selector ) if not pods.items: raise exceptions.TutorError( f"Could not find an active pod for the {service} service" ) pod_name = pods.items[0].metadata.name # Run command return utils.kubectl( "exec", "--stdin", "--tty", "--namespace", k8s_namespace(config), pod_name, "--", *command, ) def wait_for_deployment_ready(config: Config, service: str) -> None: fmt.echo_info(f"Waiting for a {service} deployment to be ready...") utils.kubectl( "wait", *resource_selector(config, f"app.kubernetes.io/name={service}"), "--for=condition=Available=True", "--timeout=600s", "deployment", ) def resource_selector(config: Config, *selectors: str) -> List[str]: """ Convenient utility to filter the resources that belong to this project. """ selector = ",".join( ["app.kubernetes.io/instance=openedx-" + get_typed(config, "ID", str)] + list(selectors) ) return resource_namespace_selector(config) + ["--selector=" + selector] def resource_namespace_selector(config: Config) -> List[str]: """ Convenient utility to filter the resources that belong to this project namespace. """ return ["--namespace", k8s_namespace(config)] def k8s_namespace(config: Config) -> str: return get_typed(config, "K8S_NAMESPACE", str) k8s.add_command(launch) k8s.add_command(start) k8s.add_command(stop) k8s.add_command(reboot) k8s.add_command(delete) k8s.add_command(init) k8s.add_command(scale) k8s.add_command(exec_command) k8s.add_command(logs) k8s.add_command(wait) k8s.add_command(upgrade) k8s.add_command(apply_command) k8s.add_command(status) @hooks.Actions.PLUGINS_LOADED.add() def _add_k8s_do_commands() -> None: jobs.add_job_commands(do) k8s.add_command(do)