From cc94407ce6b40f61cc025281da89a72159261603 Mon Sep 17 00:00:00 2001 From: Fagani Hajizada Date: Tue, 10 Feb 2026 23:52:04 +0100 Subject: [PATCH] Add custom labels, securityContext to DaemonSets Allow users to configure pod-level and container-level security contexts globally for all DaemonSets via spec.daemonsets.podSecurityContext and spec.daemonsets.securityContext. Pod-level replaces the existing context; container-level merges into each container, preserving fields already set by component transforms. Propagate custom labels and annotations from the validator DaemonSet pod template onto CUDA and plugin workload pods, skipping protected selector labels ("app", "app.kubernetes.io/part-of"). Add commonLabels support to the NFD Helm subchart so users can inject additional labels onto all NFD resources. --- api/nvidia/v1/clusterpolicy_types.go | 6 + api/nvidia/v1/zz_generated.deepcopy.go | 10 + .../manifests/nvidia.com_clusterpolicies.yaml | 426 ++++++++++++++++++ cmd/nvidia-validator/main.go | 29 ++ cmd/nvidia-validator/main_test.go | 99 ++++ .../crd/bases/nvidia.com_clusterpolicies.yaml | 426 ++++++++++++++++++ controllers/object_controls.go | 70 ++- controllers/transforms_test.go | 146 ++++++ .../templates/_helpers.tpl | 3 + .../charts/node-feature-discovery/values.yaml | 3 + .../crds/nvidia.com_clusterpolicies.yaml | 426 ++++++++++++++++++ 11 files changed, 1642 insertions(+), 2 deletions(-) diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index 121493c7d..db117c619 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -245,6 +245,12 @@ type DaemonsetsSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Rolling update configuration for all DaemonSet pods" RollingUpdate *RollingUpdateSpec `json:"rollingUpdate,omitempty"` + + // Optional: Set pod-level security context for all DaemonSet pods + PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` + + // Optional: Set default container-level security context for all DaemonSet pods + SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"` } // InitContainerSpec describes configuration for initContainer image used with all components diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index 3bb70f073..c023a499b 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -500,6 +500,16 @@ func (in *DaemonsetsSpec) DeepCopyInto(out *DaemonsetsSpec) { *out = new(RollingUpdateSpec) **out = **in } + if in.PodSecurityContext != nil { + in, out := &in.PodSecurityContext, &out.PodSecurityContext + *out = new(corev1.PodSecurityContext) + (*in).DeepCopyInto(*out) + } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(corev1.SecurityContext) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DaemonsetsSpec. diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index d82340856..85245ca2e 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -172,6 +172,239 @@ spec: (scope and select) objects. May match selectors of replication controllers and services. type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object priorityClassName: type: string rollingUpdate: @@ -181,6 +414,199 @@ spec: maxUnavailable: type: string type: object + securityContext: + description: 'Optional: Set default container-level security context + for all DaemonSet pods' + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object tolerations: description: 'Optional: Set tolerations' items: diff --git a/cmd/nvidia-validator/main.go b/cmd/nvidia-validator/main.go index 277747dd5..71b3f9673 100644 --- a/cmd/nvidia-validator/main.go +++ b/cmd/nvidia-validator/main.go @@ -34,6 +34,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert/yaml" cli "github.com/urfave/cli/v3" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1317,6 +1318,8 @@ func (p *Plugin) runWorkload() error { pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations // update podSpec with node name, so it will just run on current node pod.Spec.NodeName = nodeNameFlag + // apply labels and annotations from DaemonSet pod template (e.g. spec.daemonsets.labels) + applyDaemonsetMetadataToPod(pod, validatorDaemonset) resourceName, err := p.getGPUResourceName() if err != nil { @@ -1404,6 +1407,30 @@ func loadPodSpec(podSpecPath string) (*corev1.Pod, error) { return &pod, nil } +// applyDaemonsetMetadataToPod merges labels and annotations from the DaemonSet pod template +// onto the validator workload pod (consistent with applyCommonDaemonsetMetadata). +func applyDaemonsetMetadataToPod(pod *corev1.Pod, daemonset *appsv1.DaemonSet) { + if daemonset.Spec.Template.Labels != nil { + if pod.Labels == nil { + pod.Labels = make(map[string]string) + } + for k, v := range daemonset.Spec.Template.Labels { + if k == "app" || k == "app.kubernetes.io/part-of" { + continue + } + pod.Labels[k] = v + } + } + if daemonset.Spec.Template.Annotations != nil { + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } + for k, v := range daemonset.Spec.Template.Annotations { + pod.Annotations[k] = v + } + } +} + func (p *Plugin) countGPUResources() (int64, error) { // get node info to check discovered GPU resources node, err := getNode(p.ctx, p.kubeClient) @@ -1581,6 +1608,8 @@ func (c *CUDA) runWorkload() error { pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations // update podSpec with node name, so it will just run on current node pod.Spec.NodeName = nodeNameFlag + // apply labels and annotations from DaemonSet pod template (e.g. spec.daemonsets.labels) + applyDaemonsetMetadataToPod(pod, validatorDaemonset) opts := meta_v1.ListOptions{LabelSelector: labels.Set{"app": cudaValidatorLabelValue}.AsSelector().String(), FieldSelector: fields.Set{"spec.nodeName": nodeNameFlag}.AsSelector().String()} diff --git a/cmd/nvidia-validator/main_test.go b/cmd/nvidia-validator/main_test.go index d0199dd18..f6d1273e5 100644 --- a/cmd/nvidia-validator/main_test.go +++ b/cmd/nvidia-validator/main_test.go @@ -19,7 +19,12 @@ package main import ( "context" "os" + "reflect" "testing" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func Test_isValidComponent(t *testing.T) { @@ -216,3 +221,97 @@ UNKNOWN_FEATURE: true`, }) } } + +func Test_applyDaemonsetMetadataToPod(t *testing.T) { + tests := []struct { + name string + pod *corev1.Pod + daemonset *appsv1.DaemonSet + wantLabels map[string]string + wantAnno map[string]string + }{ + { + name: "empty daemonset template - no change", + pod: &corev1.Pod{ + ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}, + }, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{}, + }, + }, + }, + wantLabels: map[string]string{"app": "nvidia-cuda-validator"}, + wantAnno: nil, + }, + { + name: "custom labels applied, app and app.kubernetes.io/part-of skipped", + pod: &corev1.Pod{ + ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}, + }, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Labels: map[string]string{ + "app": "should-be-skipped", + "app.kubernetes.io/part-of": "should-be-skipped", + "custom.company.com/team": "gpu-ops", + "custom.company.com/env": "prod", + }, + }, + }, + }, + }, + wantLabels: map[string]string{ + "app": "nvidia-cuda-validator", + "custom.company.com/team": "gpu-ops", + "custom.company.com/env": "prod", + }, + wantAnno: nil, + }, + { + name: "annotations applied", + pod: &corev1.Pod{ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}}, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Annotations: map[string]string{"custom.annotation/key": "value"}, + }, + }, + }, + }, + wantLabels: map[string]string{"app": "nvidia-cuda-validator"}, + wantAnno: map[string]string{"custom.annotation/key": "value"}, + }, + { + name: "pod with nil labels gets labels and annotations", + pod: &corev1.Pod{}, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Labels: map[string]string{"extra": "label"}, + Annotations: map[string]string{"extra": "anno"}, + }, + }, + }, + }, + wantLabels: map[string]string{"extra": "label"}, + wantAnno: map[string]string{"extra": "anno"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + applyDaemonsetMetadataToPod(tt.pod, tt.daemonset) + if !reflect.DeepEqual(tt.pod.Labels, tt.wantLabels) { + t.Errorf("labels = %v, want %v", tt.pod.Labels, tt.wantLabels) + } + if !reflect.DeepEqual(tt.pod.Annotations, tt.wantAnno) { + t.Errorf("annotations = %v, want %v", tt.pod.Annotations, tt.wantAnno) + } + }) + } +} diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index d82340856..85245ca2e 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -172,6 +172,239 @@ spec: (scope and select) objects. May match selectors of replication controllers and services. type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object priorityClassName: type: string rollingUpdate: @@ -181,6 +414,199 @@ spec: maxUnavailable: type: string type: object + securityContext: + description: 'Optional: Set default container-level security context + for all DaemonSet pods' + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object tolerations: description: 'Optional: Set tolerations' items: diff --git a/controllers/object_controls.go b/controllers/object_controls.go index fe7e9df4b..f8e248a25 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -783,14 +783,14 @@ func preProcessDaemonSet(obj *appsv1.DaemonSet, n ClusterPolicyController) error } // applyCommonDaemonsetMetadata adds additional labels and annotations to the daemonset podSpec if there are any specified -// by the user in the podSpec +// by the user in the podSpec. func applyCommonDaemonsetMetadata(obj *appsv1.DaemonSet, dsSpec *gpuv1.DaemonsetsSpec) { if len(dsSpec.Labels) > 0 { if obj.Spec.Template.Labels == nil { obj.Spec.Template.Labels = make(map[string]string) } for labelKey, labelValue := range dsSpec.Labels { - // if the user specifies an override of the "app" or the ""app.kubernetes.io/part-of"" key, we skip it. + // if the user specifies an override of the "app" or the "app.kubernetes.io/part-of" key, we skip it. // DaemonSet pod selectors are immutable, so we still want the pods to be selectable as before and working // with the existing daemon set selectors. if labelKey == "app" || labelKey == "app.kubernetes.io/part-of" { @@ -810,6 +810,49 @@ func applyCommonDaemonsetMetadata(obj *appsv1.DaemonSet, dsSpec *gpuv1.Daemonset } } +// mergeSecurityContext copies fields from defaults into target only when the target's field is nil. +func mergeSecurityContext(target, defaults *corev1.SecurityContext) { + if target == nil || defaults == nil { + return + } + if target.RunAsUser == nil && defaults.RunAsUser != nil { + target.RunAsUser = defaults.RunAsUser + } + if target.RunAsGroup == nil && defaults.RunAsGroup != nil { + target.RunAsGroup = defaults.RunAsGroup + } + if target.RunAsNonRoot == nil && defaults.RunAsNonRoot != nil { + target.RunAsNonRoot = defaults.RunAsNonRoot + } + if target.ReadOnlyRootFilesystem == nil && defaults.ReadOnlyRootFilesystem != nil { + target.ReadOnlyRootFilesystem = defaults.ReadOnlyRootFilesystem + } + if target.AllowPrivilegeEscalation == nil && defaults.AllowPrivilegeEscalation != nil { + target.AllowPrivilegeEscalation = defaults.AllowPrivilegeEscalation + } + if target.Privileged == nil && defaults.Privileged != nil { + target.Privileged = defaults.Privileged + } + if target.Capabilities == nil && defaults.Capabilities != nil { + target.Capabilities = defaults.Capabilities.DeepCopy() + } + if target.SeccompProfile == nil && defaults.SeccompProfile != nil { + target.SeccompProfile = defaults.SeccompProfile.DeepCopy() + } + if target.SELinuxOptions == nil && defaults.SELinuxOptions != nil { + target.SELinuxOptions = defaults.SELinuxOptions.DeepCopy() + } + if target.AppArmorProfile == nil && defaults.AppArmorProfile != nil { + target.AppArmorProfile = defaults.AppArmorProfile.DeepCopy() + } + if target.ProcMount == nil && defaults.ProcMount != nil { + target.ProcMount = defaults.ProcMount + } + if target.WindowsOptions == nil && defaults.WindowsOptions != nil { + target.WindowsOptions = defaults.WindowsOptions.DeepCopy() + } +} + // Apply common config that is applicable for all Daemonsets func applyCommonDaemonsetConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec) error { // apply daemonset update strategy @@ -827,6 +870,29 @@ func applyCommonDaemonsetConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPoli if len(config.Daemonsets.Tolerations) > 0 { obj.Spec.Template.Spec.Tolerations = config.Daemonsets.Tolerations } + + // set pod-level security context if specified + if config.Daemonsets.PodSecurityContext != nil { + obj.Spec.Template.Spec.SecurityContext = config.Daemonsets.PodSecurityContext + } + + // set container-level security context defaults + if config.Daemonsets.SecurityContext != nil { + for i := range obj.Spec.Template.Spec.Containers { + if obj.Spec.Template.Spec.Containers[i].SecurityContext == nil { + obj.Spec.Template.Spec.Containers[i].SecurityContext = config.Daemonsets.SecurityContext.DeepCopy() + } else { + mergeSecurityContext(obj.Spec.Template.Spec.Containers[i].SecurityContext, config.Daemonsets.SecurityContext) + } + } + for i := range obj.Spec.Template.Spec.InitContainers { + if obj.Spec.Template.Spec.InitContainers[i].SecurityContext == nil { + obj.Spec.Template.Spec.InitContainers[i].SecurityContext = config.Daemonsets.SecurityContext.DeepCopy() + } else { + mergeSecurityContext(obj.Spec.Template.Spec.InitContainers[i].SecurityContext, config.Daemonsets.SecurityContext) + } + } + } return nil } diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 986308a16..f0316cb4d 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -143,6 +143,11 @@ func (d Daemonset) WithTolerations(tolerations []corev1.Toleration) Daemonset { return d } +func (d Daemonset) WithPodSecurityContext(psc *corev1.PodSecurityContext) Daemonset { + d.Spec.Template.Spec.SecurityContext = psc + return d +} + func (d Daemonset) WithPodLabels(labels map[string]string) Daemonset { d.Spec.Template.Labels = labels return d @@ -708,6 +713,97 @@ func TestApplyCommonDaemonSetConfig(t *testing.T) { }}, errorExpected: true, }, + { + description: "podSecurityContext configured", + ds: NewDaemonset(), + dsSpec: gpuv1.DaemonsetsSpec{ + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsUser: ptr.To(int64(1000)), + RunAsGroup: ptr.To(int64(3000)), + FSGroup: ptr.To(int64(2000)), + RunAsNonRoot: ptr.To(true), + }, + }, + expectedDs: NewDaemonset().WithPodSecurityContext(&corev1.PodSecurityContext{ + RunAsUser: ptr.To(int64(1000)), + RunAsGroup: ptr.To(int64(3000)), + FSGroup: ptr.To(int64(2000)), + RunAsNonRoot: ptr.To(true), + }), + }, + { + description: "securityContext applied to container with nil SecurityContext", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "test-ctr"}), + dsSpec: gpuv1.DaemonsetsSpec{ + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "test-ctr", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }), + }, + { + description: "securityContext merged into container with existing SecurityContext", + ds: NewDaemonset().WithContainer(corev1.Container{ + Name: "test-ctr", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + }), + dsSpec: gpuv1.DaemonsetsSpec{ + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "test-ctr", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + RunAsUser: ptr.To(int64(0)), + }, + }), + }, + { + description: "securityContext applied to initContainer with nil SecurityContext and merged into initContainer with existing", + ds: NewDaemonset(). + WithInitContainer(corev1.Container{Name: "init-1"}). + WithInitContainer(corev1.Container{ + Name: "init-2", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + }). + WithContainer(corev1.Container{Name: "main"}), + dsSpec: gpuv1.DaemonsetsSpec{ + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }, + expectedDs: NewDaemonset(). + WithInitContainer(corev1.Container{ + Name: "init-1", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }). + WithInitContainer(corev1.Container{ + Name: "init-2", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + RunAsUser: ptr.To(int64(0)), + }, + }). + WithContainer(corev1.Container{ + Name: "main", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }), + }, } for _, tc := range testCases { @@ -726,6 +822,56 @@ func TestApplyCommonDaemonSetConfig(t *testing.T) { } } +func TestMergeSecurityContext(t *testing.T) { + tests := []struct { + name string + target *corev1.SecurityContext + defaults *corev1.SecurityContext + want *corev1.SecurityContext + }{ + { + name: "target nil - no op", + target: nil, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0))}, + want: nil, + }, + { + name: "defaults nil - no op", + target: &corev1.SecurityContext{Privileged: ptr.To(true)}, + defaults: nil, + want: &corev1.SecurityContext{Privileged: ptr.To(true)}, + }, + { + name: "target empty - all defaults applied", + target: &corev1.SecurityContext{}, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0)), RunAsNonRoot: ptr.To(false)}, + want: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0)), RunAsNonRoot: ptr.To(false)}, + }, + { + name: "target has Privileged - defaults merge, target preserved", + target: &corev1.SecurityContext{Privileged: ptr.To(true)}, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0))}, + want: &corev1.SecurityContext{Privileged: ptr.To(true), RunAsUser: ptr.To(int64(0))}, + }, + { + name: "target has RunAsUser - not overwritten by defaults", + target: &corev1.SecurityContext{RunAsUser: ptr.To(int64(1000))}, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0))}, + want: &corev1.SecurityContext{RunAsUser: ptr.To(int64(1000))}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mergeSecurityContext(tt.target, tt.defaults) + if tt.want == nil { + require.Nil(t, tt.target) + return + } + require.Equal(t, tt.want, tt.target) + }) + } +} + func TestApplyCommonDaemonsetMetadata(t *testing.T) { testCases := []struct { description string diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl b/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl index b540e3bbf..28dc3d426 100644 --- a/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl +++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl @@ -52,6 +52,9 @@ helm.sh/chart: {{ include "node-feature-discovery.chart" . }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} {{- end -}} {{/* diff --git a/deployments/gpu-operator/charts/node-feature-discovery/values.yaml b/deployments/gpu-operator/charts/node-feature-discovery/values.yaml index 86712bd2e..bb9931fee 100644 --- a/deployments/gpu-operator/charts/node-feature-discovery/values.yaml +++ b/deployments/gpu-operator/charts/node-feature-discovery/values.yaml @@ -10,6 +10,9 @@ nameOverride: "" fullnameOverride: "" namespaceOverride: "" +# commonLabels are added to all resources created by this chart (DaemonSets, Deployments, etc.) +commonLabels: {} + featureGates: NodeFeatureGroupAPI: false diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index d82340856..85245ca2e 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -172,6 +172,239 @@ spec: (scope and select) objects. May match selectors of replication controllers and services. type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object priorityClassName: type: string rollingUpdate: @@ -181,6 +414,199 @@ spec: maxUnavailable: type: string type: object + securityContext: + description: 'Optional: Set default container-level security context + for all DaemonSet pods' + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object tolerations: description: 'Optional: Set tolerations' items: