k8s-manifests/jupyter/base/charts/jupyterhub/files/hub/jupyterhub_config.py

507 lines
18 KiB
Python
Raw Normal View History

# load the config object (satisfies linters)
c = get_config() # noqa
import glob
import os
import re
import sys
from jupyterhub.utils import url_path_join
from kubernetes_asyncio import client
from tornado.httpclient import AsyncHTTPClient
# Make sure that modules placed in the same directory as the jupyterhub config are added to the pythonpath
configuration_directory = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, configuration_directory)
from z2jh import (
get_config,
get_name,
get_name_env,
get_secret_value,
set_config_if_not_none,
)
def camelCaseify(s):
"""convert snake_case to camelCase
For the common case where some_value is set from someValue
so we don't have to specify the name twice.
"""
return re.sub(r"_([a-z])", lambda m: m.group(1).upper(), s)
# Configure JupyterHub to use the curl backend for making HTTP requests,
# rather than the pure-python implementations. The default one starts
# being too slow to make a large number of requests to the proxy API
# at the rate required.
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient")
c.JupyterHub.spawner_class = "kubespawner.KubeSpawner"
# Connect to a proxy running in a different pod. Note that *_SERVICE_*
# environment variables are set by Kubernetes for Services
c.ConfigurableHTTPProxy.api_url = (
f'http://{get_name("proxy-api")}:{get_name_env("proxy-api", "_SERVICE_PORT")}'
)
c.ConfigurableHTTPProxy.should_start = False
# Do not shut down user pods when hub is restarted
c.JupyterHub.cleanup_servers = False
# Check that the proxy has routes appropriately setup
c.JupyterHub.last_activity_interval = 60
# Don't wait at all before redirecting a spawning user to the progress page
c.JupyterHub.tornado_settings = {
"slow_spawn_timeout": 0,
}
# configure the hub db connection
db_type = get_config("hub.db.type")
if db_type == "sqlite-pvc":
c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite"
elif db_type == "sqlite-memory":
c.JupyterHub.db_url = "sqlite://"
else:
set_config_if_not_none(c.JupyterHub, "db_url", "hub.db.url")
db_password = get_secret_value("hub.db.password", None)
if db_password is not None:
if db_type == "mysql":
os.environ["MYSQL_PWD"] = db_password
elif db_type == "postgres":
os.environ["PGPASSWORD"] = db_password
else:
print(f"Warning: hub.db.password is ignored for hub.db.type={db_type}")
# c.JupyterHub configuration from Helm chart's configmap
for trait, cfg_key in (
("concurrent_spawn_limit", None),
("active_server_limit", None),
("base_url", None),
("allow_named_servers", None),
("named_server_limit_per_user", None),
("authenticate_prometheus", None),
("redirect_to_server", None),
("shutdown_on_logout", None),
("template_paths", None),
("template_vars", None),
):
if cfg_key is None:
cfg_key = camelCaseify(trait)
set_config_if_not_none(c.JupyterHub, trait, "hub." + cfg_key)
# hub_bind_url configures what the JupyterHub process within the hub pod's
# container should listen to.
hub_container_port = 8081
c.JupyterHub.hub_bind_url = f"http://:{hub_container_port}"
# hub_connect_url is the URL for connecting to the hub for use by external
# JupyterHub services such as the proxy. Note that *_SERVICE_* environment
# variables are set by Kubernetes for Services.
c.JupyterHub.hub_connect_url = (
f'http://{get_name("hub")}:{get_name_env("hub", "_SERVICE_PORT")}'
)
# implement common labels
2024-11-11 14:12:48 +00:00
# This mimics the jupyterhub.commonLabels helper, but declares managed-by to
# kubespawner instead of helm.
#
# The labels app and release are old labels enabled to be deleted in z2jh 5, but
# for now retained to avoid a breaking change in z2jh 4 that would force user
# server restarts. Restarts would be required because NetworkPolicy resources
# must select old/new pods with labels that then needs to be seen on both
# old/new pods, and we want these resources to keep functioning for old/new user
# server pods during an upgrade.
#
common_labels = c.KubeSpawner.common_labels = {}
2024-11-11 14:12:48 +00:00
common_labels["app.kubernetes.io/name"] = common_labels["app"] = get_config(
"nameOverride",
default=get_config("Chart.Name", "jupyterhub"),
)
2024-11-11 14:12:48 +00:00
release = get_config("Release.Name")
if release:
common_labels["app.kubernetes.io/instance"] = common_labels["release"] = release
chart_name = get_config("Chart.Name")
chart_version = get_config("Chart.Version")
if chart_name and chart_version:
2024-11-11 14:12:48 +00:00
common_labels["helm.sh/chart"] = common_labels["chart"] = (
f"{chart_name}-{chart_version.replace('+', '_')}"
)
2024-11-11 14:12:48 +00:00
common_labels["app.kubernetes.io/managed-by"] = "kubespawner"
c.KubeSpawner.namespace = os.environ.get("POD_NAMESPACE", "default")
# Max number of consecutive failures before the Hub restarts itself
set_config_if_not_none(
c.Spawner,
"consecutive_failure_limit",
"hub.consecutiveFailureLimit",
)
for trait, cfg_key in (
("pod_name_template", None),
("start_timeout", None),
("image_pull_policy", "image.pullPolicy"),
# ('image_pull_secrets', 'image.pullSecrets'), # Managed manually below
("events_enabled", "events"),
("extra_labels", None),
("extra_annotations", None),
# ("allow_privilege_escalation", None), # Managed manually below
("uid", None),
("fs_gid", None),
("service_account", "serviceAccountName"),
("storage_extra_labels", "storage.extraLabels"),
# ("tolerations", "extraTolerations"), # Managed manually below
("node_selector", None),
("node_affinity_required", "extraNodeAffinity.required"),
("node_affinity_preferred", "extraNodeAffinity.preferred"),
("pod_affinity_required", "extraPodAffinity.required"),
("pod_affinity_preferred", "extraPodAffinity.preferred"),
("pod_anti_affinity_required", "extraPodAntiAffinity.required"),
("pod_anti_affinity_preferred", "extraPodAntiAffinity.preferred"),
("lifecycle_hooks", None),
("init_containers", None),
("extra_containers", None),
("mem_limit", "memory.limit"),
("mem_guarantee", "memory.guarantee"),
("cpu_limit", "cpu.limit"),
("cpu_guarantee", "cpu.guarantee"),
("extra_resource_limits", "extraResource.limits"),
("extra_resource_guarantees", "extraResource.guarantees"),
("environment", "extraEnv"),
("profile_list", None),
("extra_pod_config", None),
):
if cfg_key is None:
cfg_key = camelCaseify(trait)
set_config_if_not_none(c.KubeSpawner, trait, "singleuser." + cfg_key)
image = get_config("singleuser.image.name")
if image:
tag = get_config("singleuser.image.tag")
if tag:
image = f"{image}:{tag}"
c.KubeSpawner.image = image
# allow_privilege_escalation defaults to False in KubeSpawner 2+. Since its a
# property where None, False, and True all are valid values that users of the
# Helm chart may want to set, we can't use the set_config_if_not_none helper
# function as someone may want to override the default False value to None.
#
c.KubeSpawner.allow_privilege_escalation = get_config(
"singleuser.allowPrivilegeEscalation"
)
# Combine imagePullSecret.create (single), imagePullSecrets (list), and
# singleuser.image.pullSecrets (list).
image_pull_secrets = []
if get_config("imagePullSecret.automaticReferenceInjection") and get_config(
"imagePullSecret.create"
):
image_pull_secrets.append(get_name("image-pull-secret"))
if get_config("imagePullSecrets"):
image_pull_secrets.extend(get_config("imagePullSecrets"))
if get_config("singleuser.image.pullSecrets"):
image_pull_secrets.extend(get_config("singleuser.image.pullSecrets"))
if image_pull_secrets:
c.KubeSpawner.image_pull_secrets = image_pull_secrets
# scheduling:
if get_config("scheduling.userScheduler.enabled"):
c.KubeSpawner.scheduler_name = get_name("user-scheduler")
if get_config("scheduling.podPriority.enabled"):
c.KubeSpawner.priority_class_name = get_name("priority")
# add node-purpose affinity
match_node_purpose = get_config("scheduling.userPods.nodeAffinity.matchNodePurpose")
if match_node_purpose:
node_selector = dict(
matchExpressions=[
dict(
key="hub.jupyter.org/node-purpose",
operator="In",
values=["user"],
)
],
)
if match_node_purpose == "prefer":
c.KubeSpawner.node_affinity_preferred.append(
dict(
weight=100,
preference=node_selector,
),
)
elif match_node_purpose == "require":
c.KubeSpawner.node_affinity_required.append(node_selector)
elif match_node_purpose == "ignore":
pass
else:
raise ValueError(
f"Unrecognized value for matchNodePurpose: {match_node_purpose}"
)
# Combine the common tolerations for user pods with singleuser tolerations
scheduling_user_pods_tolerations = get_config("scheduling.userPods.tolerations", [])
singleuser_extra_tolerations = get_config("singleuser.extraTolerations", [])
tolerations = scheduling_user_pods_tolerations + singleuser_extra_tolerations
if tolerations:
c.KubeSpawner.tolerations = tolerations
# Configure dynamically provisioning pvc
storage_type = get_config("singleuser.storage.type")
if storage_type == "dynamic":
pvc_name_template = get_config("singleuser.storage.dynamic.pvcNameTemplate")
2024-11-11 14:12:48 +00:00
if pvc_name_template:
c.KubeSpawner.pvc_name_template = pvc_name_template
volume_name_template = get_config("singleuser.storage.dynamic.volumeNameTemplate")
c.KubeSpawner.storage_pvc_ensure = True
set_config_if_not_none(
c.KubeSpawner, "storage_class", "singleuser.storage.dynamic.storageClass"
)
set_config_if_not_none(
c.KubeSpawner,
"storage_access_modes",
"singleuser.storage.dynamic.storageAccessModes",
)
set_config_if_not_none(
c.KubeSpawner, "storage_capacity", "singleuser.storage.capacity"
)
# Add volumes to singleuser pods
c.KubeSpawner.volumes = [
{
"name": volume_name_template,
2024-11-11 14:12:48 +00:00
"persistentVolumeClaim": {"claimName": "{pvc_name}"},
}
]
c.KubeSpawner.volume_mounts = [
{
"mountPath": get_config("singleuser.storage.homeMountPath"),
"name": volume_name_template,
2024-11-11 14:12:48 +00:00
"subPath": get_config("singleuser.storage.dynamic.subPath"),
}
]
elif storage_type == "static":
pvc_claim_name = get_config("singleuser.storage.static.pvcName")
c.KubeSpawner.volumes = [
{"name": "home", "persistentVolumeClaim": {"claimName": pvc_claim_name}}
]
c.KubeSpawner.volume_mounts = [
{
"mountPath": get_config("singleuser.storage.homeMountPath"),
"name": "home",
"subPath": get_config("singleuser.storage.static.subPath"),
}
]
# Inject singleuser.extraFiles as volumes and volumeMounts with data loaded from
# the dedicated k8s Secret prepared to hold the extraFiles actual content.
extra_files = get_config("singleuser.extraFiles", {})
if extra_files:
volume = {
"name": "files",
}
items = []
for file_key, file_details in extra_files.items():
# Each item is a mapping of a key in the k8s Secret to a path in this
# abstract volume, the goal is to enable us to set the mode /
# permissions only though so we don't change the mapping.
item = {
"key": file_key,
"path": file_key,
}
if "mode" in file_details:
item["mode"] = file_details["mode"]
items.append(item)
volume["secret"] = {
"secretName": get_name("singleuser"),
"items": items,
}
c.KubeSpawner.volumes.append(volume)
volume_mounts = []
for file_key, file_details in extra_files.items():
volume_mounts.append(
{
"mountPath": file_details["mountPath"],
"subPath": file_key,
"name": "files",
}
)
c.KubeSpawner.volume_mounts.extend(volume_mounts)
# Inject extraVolumes / extraVolumeMounts
c.KubeSpawner.volumes.extend(get_config("singleuser.storage.extraVolumes", []))
c.KubeSpawner.volume_mounts.extend(
get_config("singleuser.storage.extraVolumeMounts", [])
)
c.JupyterHub.services = []
c.JupyterHub.load_roles = []
# jupyterhub-idle-culler's permissions are scoped to what it needs only, see
# https://github.com/jupyterhub/jupyterhub-idle-culler#permissions.
#
if get_config("cull.enabled", False):
jupyterhub_idle_culler_role = {
"name": "jupyterhub-idle-culler",
"scopes": [
"list:users",
"read:users:activity",
"read:servers",
"delete:servers",
# "admin:users", # dynamically added if --cull-users is passed
],
# assign the role to a jupyterhub service, so it gains these permissions
"services": ["jupyterhub-idle-culler"],
}
cull_cmd = ["python3", "-m", "jupyterhub_idle_culler"]
base_url = c.JupyterHub.get("base_url", "/")
cull_cmd.append("--url=http://localhost:8081" + url_path_join(base_url, "hub/api"))
cull_timeout = get_config("cull.timeout")
if cull_timeout:
cull_cmd.append(f"--timeout={cull_timeout}")
cull_every = get_config("cull.every")
if cull_every:
cull_cmd.append(f"--cull-every={cull_every}")
cull_concurrency = get_config("cull.concurrency")
if cull_concurrency:
cull_cmd.append(f"--concurrency={cull_concurrency}")
if get_config("cull.users"):
cull_cmd.append("--cull-users")
jupyterhub_idle_culler_role["scopes"].append("admin:users")
if not get_config("cull.adminUsers"):
cull_cmd.append("--cull-admin-users=false")
if get_config("cull.removeNamedServers"):
cull_cmd.append("--remove-named-servers")
cull_max_age = get_config("cull.maxAge")
if cull_max_age:
cull_cmd.append(f"--max-age={cull_max_age}")
c.JupyterHub.services.append(
{
"name": "jupyterhub-idle-culler",
"command": cull_cmd,
}
)
c.JupyterHub.load_roles.append(jupyterhub_idle_culler_role)
for key, service in get_config("hub.services", {}).items():
# c.JupyterHub.services is a list of dicts, but
# hub.services is a dict of dicts to make the config mergable
service.setdefault("name", key)
# As the api_token could be exposed in hub.existingSecret, we need to read
# it it from there or fall back to the chart managed k8s Secret's value.
service.pop("apiToken", None)
service["api_token"] = get_secret_value(f"hub.services.{key}.apiToken")
c.JupyterHub.services.append(service)
for key, role in get_config("hub.loadRoles", {}).items():
# c.JupyterHub.load_roles is a list of dicts, but
# hub.loadRoles is a dict of dicts to make the config mergable
role.setdefault("name", key)
c.JupyterHub.load_roles.append(role)
# respect explicit null command (distinct from unspecified)
# this avoids relying on KubeSpawner.cmd's default being None
_unspecified = object()
specified_cmd = get_config("singleuser.cmd", _unspecified)
if specified_cmd is not _unspecified:
c.Spawner.cmd = specified_cmd
set_config_if_not_none(c.Spawner, "default_url", "singleuser.defaultUrl")
cloud_metadata = get_config("singleuser.cloudMetadata")
if cloud_metadata.get("blockWithIptables") == True:
# Use iptables to block access to cloud metadata by default
network_tools_image_name = get_config("singleuser.networkTools.image.name")
network_tools_image_tag = get_config("singleuser.networkTools.image.tag")
network_tools_resources = get_config("singleuser.networkTools.resources")
ip = cloud_metadata["ip"]
ip_block_container = client.V1Container(
name="block-cloud-metadata",
image=f"{network_tools_image_name}:{network_tools_image_tag}",
command=[
"iptables",
"--append",
"OUTPUT",
"--protocol",
"tcp",
"--destination",
ip,
"--destination-port",
"80",
"--jump",
"DROP",
],
security_context=client.V1SecurityContext(
privileged=True,
run_as_user=0,
capabilities=client.V1Capabilities(add=["NET_ADMIN"]),
),
resources=network_tools_resources,
)
c.KubeSpawner.init_containers.append(ip_block_container)
if get_config("debug.enabled", False):
c.JupyterHub.log_level = "DEBUG"
c.Spawner.debug = True
# load potentially seeded secrets
#
# NOTE: ConfigurableHTTPProxy.auth_token is set through an environment variable
# that is set using the chart managed secret.
c.JupyterHub.cookie_secret = get_secret_value("hub.config.JupyterHub.cookie_secret")
# NOTE: CryptKeeper.keys should be a list of strings, but we have encoded as a
# single string joined with ; in the k8s Secret.
#
c.CryptKeeper.keys = get_secret_value("hub.config.CryptKeeper.keys").split(";")
# load hub.config values, except potentially seeded secrets already loaded
for app, cfg in get_config("hub.config", {}).items():
if app == "JupyterHub":
cfg.pop("proxy_auth_token", None)
cfg.pop("cookie_secret", None)
cfg.pop("services", None)
elif app == "ConfigurableHTTPProxy":
cfg.pop("auth_token", None)
elif app == "CryptKeeper":
cfg.pop("keys", None)
c[app].update(cfg)
# load /usr/local/etc/jupyterhub/jupyterhub_config.d config files
config_dir = "/usr/local/etc/jupyterhub/jupyterhub_config.d"
if os.path.isdir(config_dir):
for file_path in sorted(glob.glob(f"{config_dir}/*.py")):
file_name = os.path.basename(file_path)
print(f"Loading {config_dir} config: {file_name}")
with open(file_path) as f:
file_content = f.read()
# compiling makes debugging easier: https://stackoverflow.com/a/437857
exec(compile(source=file_content, filename=file_name, mode="exec"))
# execute hub.extraConfig entries
for key, config_py in sorted(get_config("hub.extraConfig", {}).items()):
print(f"Loading extra config: {key}")
exec(config_py)