507 lines
18 KiB
Python
507 lines
18 KiB
Python
# load the config object (satisfies linters)
|
|
c = get_config() # noqa
|
|
|
|
import glob
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from jupyterhub.utils import url_path_join
|
|
from kubernetes_asyncio import client
|
|
from tornado.httpclient import AsyncHTTPClient
|
|
|
|
# Make sure that modules placed in the same directory as the jupyterhub config are added to the pythonpath
|
|
configuration_directory = os.path.dirname(os.path.realpath(__file__))
|
|
sys.path.insert(0, configuration_directory)
|
|
|
|
from z2jh import (
|
|
get_config,
|
|
get_name,
|
|
get_name_env,
|
|
get_secret_value,
|
|
set_config_if_not_none,
|
|
)
|
|
|
|
|
|
def camelCaseify(s):
|
|
"""convert snake_case to camelCase
|
|
|
|
For the common case where some_value is set from someValue
|
|
so we don't have to specify the name twice.
|
|
"""
|
|
return re.sub(r"_([a-z])", lambda m: m.group(1).upper(), s)
|
|
|
|
|
|
# Configure JupyterHub to use the curl backend for making HTTP requests,
|
|
# rather than the pure-python implementations. The default one starts
|
|
# being too slow to make a large number of requests to the proxy API
|
|
# at the rate required.
|
|
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
|
|
|
|
c.JupyterHub.spawner_class = "kubespawner.KubeSpawner"
|
|
|
|
# Connect to a proxy running in a different pod. Note that *_SERVICE_*
|
|
# environment variables are set by Kubernetes for Services
|
|
c.ConfigurableHTTPProxy.api_url = (
|
|
f'http://{get_name("proxy-api")}:{get_name_env("proxy-api", "_SERVICE_PORT")}'
|
|
)
|
|
c.ConfigurableHTTPProxy.should_start = False
|
|
|
|
# Do not shut down user pods when hub is restarted
|
|
c.JupyterHub.cleanup_servers = False
|
|
|
|
# Check that the proxy has routes appropriately setup
|
|
c.JupyterHub.last_activity_interval = 60
|
|
|
|
# Don't wait at all before redirecting a spawning user to the progress page
|
|
c.JupyterHub.tornado_settings = {
|
|
"slow_spawn_timeout": 0,
|
|
}
|
|
|
|
|
|
# configure the hub db connection
|
|
db_type = get_config("hub.db.type")
|
|
if db_type == "sqlite-pvc":
|
|
c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
|
|
elif db_type == "sqlite-memory":
|
|
c.JupyterHub.db_url = "sqlite://"
|
|
else:
|
|
set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")
|
|
db_password = get_secret_value("hub.db.password", None)
|
|
if db_password is not None:
|
|
if db_type == "mysql":
|
|
os.environ["MYSQL_PWD"] = db_password
|
|
elif db_type == "postgres":
|
|
os.environ["PGPASSWORD"] = db_password
|
|
else:
|
|
print(f"Warning: hub.db.password is ignored for hub.db.type={db_type}")
|
|
|
|
|
|
# c.JupyterHub configuration from Helm chart's configmap
|
|
for trait, cfg_key in (
|
|
("concurrent_spawn_limit", None),
|
|
("active_server_limit", None),
|
|
("base_url", None),
|
|
("allow_named_servers", None),
|
|
("named_server_limit_per_user", None),
|
|
("authenticate_prometheus", None),
|
|
("redirect_to_server", None),
|
|
("shutdown_on_logout", None),
|
|
("template_paths", None),
|
|
("template_vars", None),
|
|
):
|
|
if cfg_key is None:
|
|
cfg_key = camelCaseify(trait)
|
|
set_config_if_not_none(c.JupyterHub, trait, "hub." + cfg_key)
|
|
|
|
# hub_bind_url configures what the JupyterHub process within the hub pod's
|
|
# container should listen to.
|
|
hub_container_port = 8081
|
|
c.JupyterHub.hub_bind_url = f"http://:{hub_container_port}"
|
|
|
|
# hub_connect_url is the URL for connecting to the hub for use by external
|
|
# JupyterHub services such as the proxy. Note that *_SERVICE_* environment
|
|
# variables are set by Kubernetes for Services.
|
|
c.JupyterHub.hub_connect_url = (
|
|
f'http://{get_name("hub")}:{get_name_env("hub", "_SERVICE_PORT")}'
|
|
)
|
|
|
|
# implement common labels
|
|
# This mimics the jupyterhub.commonLabels helper, but declares managed-by to
|
|
# kubespawner instead of helm.
|
|
#
|
|
# The labels app and release are old labels enabled to be deleted in z2jh 5, but
|
|
# for now retained to avoid a breaking change in z2jh 4 that would force user
|
|
# server restarts. Restarts would be required because NetworkPolicy resources
|
|
# must select old/new pods with labels that then needs to be seen on both
|
|
# old/new pods, and we want these resources to keep functioning for old/new user
|
|
# server pods during an upgrade.
|
|
#
|
|
common_labels = c.KubeSpawner.common_labels = {}
|
|
common_labels["app.kubernetes.io/name"] = common_labels["app"] = get_config(
|
|
"nameOverride",
|
|
default=get_config("Chart.Name", "jupyterhub"),
|
|
)
|
|
release = get_config("Release.Name")
|
|
if release:
|
|
common_labels["app.kubernetes.io/instance"] = common_labels["release"] = release
|
|
chart_name = get_config("Chart.Name")
|
|
chart_version = get_config("Chart.Version")
|
|
if chart_name and chart_version:
|
|
common_labels["helm.sh/chart"] = common_labels["chart"] = (
|
|
f"{chart_name}-{chart_version.replace('+', '_')}"
|
|
)
|
|
common_labels["app.kubernetes.io/managed-by"] = "kubespawner"
|
|
|
|
c.KubeSpawner.namespace = os.environ.get("POD_NAMESPACE", "default")
|
|
|
|
# Max number of consecutive failures before the Hub restarts itself
|
|
set_config_if_not_none(
|
|
c.Spawner,
|
|
"consecutive_failure_limit",
|
|
"hub.consecutiveFailureLimit",
|
|
)
|
|
|
|
for trait, cfg_key in (
|
|
("pod_name_template", None),
|
|
("start_timeout", None),
|
|
("image_pull_policy", "image.pullPolicy"),
|
|
# ('image_pull_secrets', 'image.pullSecrets'), # Managed manually below
|
|
("events_enabled", "events"),
|
|
("extra_labels", None),
|
|
("extra_annotations", None),
|
|
# ("allow_privilege_escalation", None), # Managed manually below
|
|
("uid", None),
|
|
("fs_gid", None),
|
|
("service_account", "serviceAccountName"),
|
|
("storage_extra_labels", "storage.extraLabels"),
|
|
# ("tolerations", "extraTolerations"), # Managed manually below
|
|
("node_selector", None),
|
|
("node_affinity_required", "extraNodeAffinity.required"),
|
|
("node_affinity_preferred", "extraNodeAffinity.preferred"),
|
|
("pod_affinity_required", "extraPodAffinity.required"),
|
|
("pod_affinity_preferred", "extraPodAffinity.preferred"),
|
|
("pod_anti_affinity_required", "extraPodAntiAffinity.required"),
|
|
("pod_anti_affinity_preferred", "extraPodAntiAffinity.preferred"),
|
|
("lifecycle_hooks", None),
|
|
("init_containers", None),
|
|
("extra_containers", None),
|
|
("mem_limit", "memory.limit"),
|
|
("mem_guarantee", "memory.guarantee"),
|
|
("cpu_limit", "cpu.limit"),
|
|
("cpu_guarantee", "cpu.guarantee"),
|
|
("extra_resource_limits", "extraResource.limits"),
|
|
("extra_resource_guarantees", "extraResource.guarantees"),
|
|
("environment", "extraEnv"),
|
|
("profile_list", None),
|
|
("extra_pod_config", None),
|
|
):
|
|
if cfg_key is None:
|
|
cfg_key = camelCaseify(trait)
|
|
set_config_if_not_none(c.KubeSpawner, trait, "singleuser." + cfg_key)
|
|
|
|
image = get_config("singleuser.image.name")
|
|
if image:
|
|
tag = get_config("singleuser.image.tag")
|
|
if tag:
|
|
image = f"{image}:{tag}"
|
|
|
|
c.KubeSpawner.image = image
|
|
|
|
# allow_privilege_escalation defaults to False in KubeSpawner 2+. Since its a
|
|
# property where None, False, and True all are valid values that users of the
|
|
# Helm chart may want to set, we can't use the set_config_if_not_none helper
|
|
# function as someone may want to override the default False value to None.
|
|
#
|
|
c.KubeSpawner.allow_privilege_escalation = get_config(
|
|
"singleuser.allowPrivilegeEscalation"
|
|
)
|
|
|
|
# Combine imagePullSecret.create (single), imagePullSecrets (list), and
|
|
# singleuser.image.pullSecrets (list).
|
|
image_pull_secrets = []
|
|
if get_config("imagePullSecret.automaticReferenceInjection") and get_config(
|
|
"imagePullSecret.create"
|
|
):
|
|
image_pull_secrets.append(get_name("image-pull-secret"))
|
|
if get_config("imagePullSecrets"):
|
|
image_pull_secrets.extend(get_config("imagePullSecrets"))
|
|
if get_config("singleuser.image.pullSecrets"):
|
|
image_pull_secrets.extend(get_config("singleuser.image.pullSecrets"))
|
|
if image_pull_secrets:
|
|
c.KubeSpawner.image_pull_secrets = image_pull_secrets
|
|
|
|
# scheduling:
|
|
if get_config("scheduling.userScheduler.enabled"):
|
|
c.KubeSpawner.scheduler_name = get_name("user-scheduler")
|
|
if get_config("scheduling.podPriority.enabled"):
|
|
c.KubeSpawner.priority_class_name = get_name("priority")
|
|
|
|
# add node-purpose affinity
|
|
match_node_purpose = get_config("scheduling.userPods.nodeAffinity.matchNodePurpose")
|
|
if match_node_purpose:
|
|
node_selector = dict(
|
|
matchExpressions=[
|
|
dict(
|
|
key="hub.jupyter.org/node-purpose",
|
|
operator="In",
|
|
values=["user"],
|
|
)
|
|
],
|
|
)
|
|
if match_node_purpose == "prefer":
|
|
c.KubeSpawner.node_affinity_preferred.append(
|
|
dict(
|
|
weight=100,
|
|
preference=node_selector,
|
|
),
|
|
)
|
|
elif match_node_purpose == "require":
|
|
c.KubeSpawner.node_affinity_required.append(node_selector)
|
|
elif match_node_purpose == "ignore":
|
|
pass
|
|
else:
|
|
raise ValueError(
|
|
f"Unrecognized value for matchNodePurpose: {match_node_purpose}"
|
|
)
|
|
|
|
# Combine the common tolerations for user pods with singleuser tolerations
|
|
scheduling_user_pods_tolerations = get_config("scheduling.userPods.tolerations", [])
|
|
singleuser_extra_tolerations = get_config("singleuser.extraTolerations", [])
|
|
tolerations = scheduling_user_pods_tolerations + singleuser_extra_tolerations
|
|
if tolerations:
|
|
c.KubeSpawner.tolerations = tolerations
|
|
|
|
# Configure dynamically provisioning pvc
|
|
storage_type = get_config("singleuser.storage.type")
|
|
if storage_type == "dynamic":
|
|
pvc_name_template = get_config("singleuser.storage.dynamic.pvcNameTemplate")
|
|
if pvc_name_template:
|
|
c.KubeSpawner.pvc_name_template = pvc_name_template
|
|
volume_name_template = get_config("singleuser.storage.dynamic.volumeNameTemplate")
|
|
c.KubeSpawner.storage_pvc_ensure = True
|
|
set_config_if_not_none(
|
|
c.KubeSpawner, "storage_class", "singleuser.storage.dynamic.storageClass"
|
|
)
|
|
set_config_if_not_none(
|
|
c.KubeSpawner,
|
|
"storage_access_modes",
|
|
"singleuser.storage.dynamic.storageAccessModes",
|
|
)
|
|
set_config_if_not_none(
|
|
c.KubeSpawner, "storage_capacity", "singleuser.storage.capacity"
|
|
)
|
|
|
|
# Add volumes to singleuser pods
|
|
c.KubeSpawner.volumes = [
|
|
{
|
|
"name": volume_name_template,
|
|
"persistentVolumeClaim": {"claimName": "{pvc_name}"},
|
|
}
|
|
]
|
|
c.KubeSpawner.volume_mounts = [
|
|
{
|
|
"mountPath": get_config("singleuser.storage.homeMountPath"),
|
|
"name": volume_name_template,
|
|
"subPath": get_config("singleuser.storage.dynamic.subPath"),
|
|
}
|
|
]
|
|
elif storage_type == "static":
|
|
pvc_claim_name = get_config("singleuser.storage.static.pvcName")
|
|
c.KubeSpawner.volumes = [
|
|
{"name": "home", "persistentVolumeClaim": {"claimName": pvc_claim_name}}
|
|
]
|
|
|
|
c.KubeSpawner.volume_mounts = [
|
|
{
|
|
"mountPath": get_config("singleuser.storage.homeMountPath"),
|
|
"name": "home",
|
|
"subPath": get_config("singleuser.storage.static.subPath"),
|
|
}
|
|
]
|
|
|
|
# Inject singleuser.extraFiles as volumes and volumeMounts with data loaded from
|
|
# the dedicated k8s Secret prepared to hold the extraFiles actual content.
|
|
extra_files = get_config("singleuser.extraFiles", {})
|
|
if extra_files:
|
|
volume = {
|
|
"name": "files",
|
|
}
|
|
items = []
|
|
for file_key, file_details in extra_files.items():
|
|
# Each item is a mapping of a key in the k8s Secret to a path in this
|
|
# abstract volume, the goal is to enable us to set the mode /
|
|
# permissions only though so we don't change the mapping.
|
|
item = {
|
|
"key": file_key,
|
|
"path": file_key,
|
|
}
|
|
if "mode" in file_details:
|
|
item["mode"] = file_details["mode"]
|
|
items.append(item)
|
|
volume["secret"] = {
|
|
"secretName": get_name("singleuser"),
|
|
"items": items,
|
|
}
|
|
c.KubeSpawner.volumes.append(volume)
|
|
|
|
volume_mounts = []
|
|
for file_key, file_details in extra_files.items():
|
|
volume_mounts.append(
|
|
{
|
|
"mountPath": file_details["mountPath"],
|
|
"subPath": file_key,
|
|
"name": "files",
|
|
}
|
|
)
|
|
c.KubeSpawner.volume_mounts.extend(volume_mounts)
|
|
|
|
# Inject extraVolumes / extraVolumeMounts
|
|
c.KubeSpawner.volumes.extend(get_config("singleuser.storage.extraVolumes", []))
|
|
c.KubeSpawner.volume_mounts.extend(
|
|
get_config("singleuser.storage.extraVolumeMounts", [])
|
|
)
|
|
|
|
c.JupyterHub.services = []
|
|
c.JupyterHub.load_roles = []
|
|
|
|
# jupyterhub-idle-culler's permissions are scoped to what it needs only, see
|
|
# https://github.com/jupyterhub/jupyterhub-idle-culler#permissions.
|
|
#
|
|
if get_config("cull.enabled", False):
|
|
jupyterhub_idle_culler_role = {
|
|
"name": "jupyterhub-idle-culler",
|
|
"scopes": [
|
|
"list:users",
|
|
"read:users:activity",
|
|
"read:servers",
|
|
"delete:servers",
|
|
# "admin:users", # dynamically added if --cull-users is passed
|
|
],
|
|
# assign the role to a jupyterhub service, so it gains these permissions
|
|
"services": ["jupyterhub-idle-culler"],
|
|
}
|
|
|
|
cull_cmd = ["python3", "-m", "jupyterhub_idle_culler"]
|
|
base_url = c.JupyterHub.get("base_url", "/")
|
|
cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api"))
|
|
|
|
cull_timeout = get_config("cull.timeout")
|
|
if cull_timeout:
|
|
cull_cmd.append(f"--timeout={cull_timeout}")
|
|
|
|
cull_every = get_config("cull.every")
|
|
if cull_every:
|
|
cull_cmd.append(f"--cull-every={cull_every}")
|
|
|
|
cull_concurrency = get_config("cull.concurrency")
|
|
if cull_concurrency:
|
|
cull_cmd.append(f"--concurrency={cull_concurrency}")
|
|
|
|
if get_config("cull.users"):
|
|
cull_cmd.append("--cull-users")
|
|
jupyterhub_idle_culler_role["scopes"].append("admin:users")
|
|
|
|
if not get_config("cull.adminUsers"):
|
|
cull_cmd.append("--cull-admin-users=false")
|
|
|
|
if get_config("cull.removeNamedServers"):
|
|
cull_cmd.append("--remove-named-servers")
|
|
|
|
cull_max_age = get_config("cull.maxAge")
|
|
if cull_max_age:
|
|
cull_cmd.append(f"--max-age={cull_max_age}")
|
|
|
|
c.JupyterHub.services.append(
|
|
{
|
|
"name": "jupyterhub-idle-culler",
|
|
"command": cull_cmd,
|
|
}
|
|
)
|
|
c.JupyterHub.load_roles.append(jupyterhub_idle_culler_role)
|
|
|
|
for key, service in get_config("hub.services", {}).items():
|
|
# c.JupyterHub.services is a list of dicts, but
|
|
# hub.services is a dict of dicts to make the config mergable
|
|
service.setdefault("name", key)
|
|
|
|
# As the api_token could be exposed in hub.existingSecret, we need to read
|
|
# it it from there or fall back to the chart managed k8s Secret's value.
|
|
service.pop("apiToken", None)
|
|
service["api_token"] = get_secret_value(f"hub.services.{key}.apiToken")
|
|
|
|
c.JupyterHub.services.append(service)
|
|
|
|
for key, role in get_config("hub.loadRoles", {}).items():
|
|
# c.JupyterHub.load_roles is a list of dicts, but
|
|
# hub.loadRoles is a dict of dicts to make the config mergable
|
|
role.setdefault("name", key)
|
|
|
|
c.JupyterHub.load_roles.append(role)
|
|
|
|
# respect explicit null command (distinct from unspecified)
|
|
# this avoids relying on KubeSpawner.cmd's default being None
|
|
_unspecified = object()
|
|
specified_cmd = get_config("singleuser.cmd", _unspecified)
|
|
if specified_cmd is not _unspecified:
|
|
c.Spawner.cmd = specified_cmd
|
|
|
|
set_config_if_not_none(c.Spawner, "default_url", "singleuser.defaultUrl")
|
|
|
|
cloud_metadata = get_config("singleuser.cloudMetadata")
|
|
|
|
if cloud_metadata.get("blockWithIptables") == True:
|
|
# Use iptables to block access to cloud metadata by default
|
|
network_tools_image_name = get_config("singleuser.networkTools.image.name")
|
|
network_tools_image_tag = get_config("singleuser.networkTools.image.tag")
|
|
network_tools_resources = get_config("singleuser.networkTools.resources")
|
|
ip = cloud_metadata["ip"]
|
|
ip_block_container = client.V1Container(
|
|
name="block-cloud-metadata",
|
|
image=f"{network_tools_image_name}:{network_tools_image_tag}",
|
|
command=[
|
|
"iptables",
|
|
"--append",
|
|
"OUTPUT",
|
|
"--protocol",
|
|
"tcp",
|
|
"--destination",
|
|
ip,
|
|
"--destination-port",
|
|
"80",
|
|
"--jump",
|
|
"DROP",
|
|
],
|
|
security_context=client.V1SecurityContext(
|
|
privileged=True,
|
|
run_as_user=0,
|
|
capabilities=client.V1Capabilities(add=["NET_ADMIN"]),
|
|
),
|
|
resources=network_tools_resources,
|
|
)
|
|
|
|
c.KubeSpawner.init_containers.append(ip_block_container)
|
|
|
|
|
|
if get_config("debug.enabled", False):
|
|
c.JupyterHub.log_level = "DEBUG"
|
|
c.Spawner.debug = True
|
|
|
|
# load potentially seeded secrets
|
|
#
|
|
# NOTE: ConfigurableHTTPProxy.auth_token is set through an environment variable
|
|
# that is set using the chart managed secret.
|
|
c.JupyterHub.cookie_secret = get_secret_value("hub.config.JupyterHub.cookie_secret")
|
|
# NOTE: CryptKeeper.keys should be a list of strings, but we have encoded as a
|
|
# single string joined with ; in the k8s Secret.
|
|
#
|
|
c.CryptKeeper.keys = get_secret_value("hub.config.CryptKeeper.keys").split(";")
|
|
|
|
# load hub.config values, except potentially seeded secrets already loaded
|
|
for app, cfg in get_config("hub.config", {}).items():
|
|
if app == "JupyterHub":
|
|
cfg.pop("proxy_auth_token", None)
|
|
cfg.pop("cookie_secret", None)
|
|
cfg.pop("services", None)
|
|
elif app == "ConfigurableHTTPProxy":
|
|
cfg.pop("auth_token", None)
|
|
elif app == "CryptKeeper":
|
|
cfg.pop("keys", None)
|
|
c[app].update(cfg)
|
|
|
|
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
|
|
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
|
|
if os.path.isdir(config_dir):
|
|
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
|
|
file_name = os.path.basename(file_path)
|
|
print(f"Loading {config_dir} config: {file_name}")
|
|
with open(file_path) as f:
|
|
file_content = f.read()
|
|
# compiling makes debugging easier: https://stackoverflow.com/a/437857
|
|
exec(compile(source=file_content, filename=file_name, mode="exec"))
|
|
|
|
# execute hub.extraConfig entries
|
|
for key, config_py in sorted(get_config("hub.extraConfig", {}).items()):
|
|
print(f"Loading extra config: {key}")
|
|
exec(config_py)
|