diff --git a/api/nvidia/v1/clusterpolicy_types.go b/api/nvidia/v1/clusterpolicy_types.go index 121493c7d..db117c619 100644 --- a/api/nvidia/v1/clusterpolicy_types.go +++ b/api/nvidia/v1/clusterpolicy_types.go @@ -245,6 +245,12 @@ type DaemonsetsSpec struct { // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true // +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Rolling update configuration for all DaemonSet pods" RollingUpdate *RollingUpdateSpec `json:"rollingUpdate,omitempty"` + + // Optional: Set pod-level security context for all DaemonSet pods + PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"` + + // Optional: Set default container-level security context for all DaemonSet pods + SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"` } // InitContainerSpec describes configuration for initContainer image used with all components diff --git a/api/nvidia/v1/zz_generated.deepcopy.go b/api/nvidia/v1/zz_generated.deepcopy.go index 3bb70f073..c023a499b 100644 --- a/api/nvidia/v1/zz_generated.deepcopy.go +++ b/api/nvidia/v1/zz_generated.deepcopy.go @@ -500,6 +500,16 @@ func (in *DaemonsetsSpec) DeepCopyInto(out *DaemonsetsSpec) { *out = new(RollingUpdateSpec) **out = **in } + if in.PodSecurityContext != nil { + in, out := &in.PodSecurityContext, &out.PodSecurityContext + *out = new(corev1.PodSecurityContext) + (*in).DeepCopyInto(*out) + } + if in.SecurityContext != nil { + in, out := &in.SecurityContext, &out.SecurityContext + *out = new(corev1.SecurityContext) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DaemonsetsSpec. diff --git a/bundle/manifests/nvidia.com_clusterpolicies.yaml b/bundle/manifests/nvidia.com_clusterpolicies.yaml index d82340856..85245ca2e 100644 --- a/bundle/manifests/nvidia.com_clusterpolicies.yaml +++ b/bundle/manifests/nvidia.com_clusterpolicies.yaml @@ -172,6 +172,239 @@ spec: (scope and select) objects. May match selectors of replication controllers and services. type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object priorityClassName: type: string rollingUpdate: @@ -181,6 +414,199 @@ spec: maxUnavailable: type: string type: object + securityContext: + description: 'Optional: Set default container-level security context + for all DaemonSet pods' + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object tolerations: description: 'Optional: Set tolerations' items: diff --git a/cmd/nvidia-validator/main.go b/cmd/nvidia-validator/main.go index 277747dd5..71b3f9673 100644 --- a/cmd/nvidia-validator/main.go +++ b/cmd/nvidia-validator/main.go @@ -34,6 +34,7 @@ import ( log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert/yaml" cli "github.com/urfave/cli/v3" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -1317,6 +1318,8 @@ func (p *Plugin) runWorkload() error { pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations // update podSpec with node name, so it will just run on current node pod.Spec.NodeName = nodeNameFlag + // apply labels and annotations from DaemonSet pod template (e.g. spec.daemonsets.labels) + applyDaemonsetMetadataToPod(pod, validatorDaemonset) resourceName, err := p.getGPUResourceName() if err != nil { @@ -1404,6 +1407,30 @@ func loadPodSpec(podSpecPath string) (*corev1.Pod, error) { return &pod, nil } +// applyDaemonsetMetadataToPod merges labels and annotations from the DaemonSet pod template +// onto the validator workload pod (consistent with applyCommonDaemonsetMetadata). +func applyDaemonsetMetadataToPod(pod *corev1.Pod, daemonset *appsv1.DaemonSet) { + if daemonset.Spec.Template.Labels != nil { + if pod.Labels == nil { + pod.Labels = make(map[string]string) + } + for k, v := range daemonset.Spec.Template.Labels { + if k == "app" || k == "app.kubernetes.io/part-of" { + continue + } + pod.Labels[k] = v + } + } + if daemonset.Spec.Template.Annotations != nil { + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) + } + for k, v := range daemonset.Spec.Template.Annotations { + pod.Annotations[k] = v + } + } +} + func (p *Plugin) countGPUResources() (int64, error) { // get node info to check discovered GPU resources node, err := getNode(p.ctx, p.kubeClient) @@ -1581,6 +1608,8 @@ func (c *CUDA) runWorkload() error { pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations // update podSpec with node name, so it will just run on current node pod.Spec.NodeName = nodeNameFlag + // apply labels and annotations from DaemonSet pod template (e.g. spec.daemonsets.labels) + applyDaemonsetMetadataToPod(pod, validatorDaemonset) opts := meta_v1.ListOptions{LabelSelector: labels.Set{"app": cudaValidatorLabelValue}.AsSelector().String(), FieldSelector: fields.Set{"spec.nodeName": nodeNameFlag}.AsSelector().String()} diff --git a/cmd/nvidia-validator/main_test.go b/cmd/nvidia-validator/main_test.go index d0199dd18..f6d1273e5 100644 --- a/cmd/nvidia-validator/main_test.go +++ b/cmd/nvidia-validator/main_test.go @@ -19,7 +19,12 @@ package main import ( "context" "os" + "reflect" "testing" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) func Test_isValidComponent(t *testing.T) { @@ -216,3 +221,97 @@ UNKNOWN_FEATURE: true`, }) } } + +func Test_applyDaemonsetMetadataToPod(t *testing.T) { + tests := []struct { + name string + pod *corev1.Pod + daemonset *appsv1.DaemonSet + wantLabels map[string]string + wantAnno map[string]string + }{ + { + name: "empty daemonset template - no change", + pod: &corev1.Pod{ + ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}, + }, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{}, + }, + }, + }, + wantLabels: map[string]string{"app": "nvidia-cuda-validator"}, + wantAnno: nil, + }, + { + name: "custom labels applied, app and app.kubernetes.io/part-of skipped", + pod: &corev1.Pod{ + ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}, + }, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Labels: map[string]string{ + "app": "should-be-skipped", + "app.kubernetes.io/part-of": "should-be-skipped", + "custom.company.com/team": "gpu-ops", + "custom.company.com/env": "prod", + }, + }, + }, + }, + }, + wantLabels: map[string]string{ + "app": "nvidia-cuda-validator", + "custom.company.com/team": "gpu-ops", + "custom.company.com/env": "prod", + }, + wantAnno: nil, + }, + { + name: "annotations applied", + pod: &corev1.Pod{ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}}, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Annotations: map[string]string{"custom.annotation/key": "value"}, + }, + }, + }, + }, + wantLabels: map[string]string{"app": "nvidia-cuda-validator"}, + wantAnno: map[string]string{"custom.annotation/key": "value"}, + }, + { + name: "pod with nil labels gets labels and annotations", + pod: &corev1.Pod{}, + daemonset: &appsv1.DaemonSet{ + Spec: appsv1.DaemonSetSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: meta_v1.ObjectMeta{ + Labels: map[string]string{"extra": "label"}, + Annotations: map[string]string{"extra": "anno"}, + }, + }, + }, + }, + wantLabels: map[string]string{"extra": "label"}, + wantAnno: map[string]string{"extra": "anno"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + applyDaemonsetMetadataToPod(tt.pod, tt.daemonset) + if !reflect.DeepEqual(tt.pod.Labels, tt.wantLabels) { + t.Errorf("labels = %v, want %v", tt.pod.Labels, tt.wantLabels) + } + if !reflect.DeepEqual(tt.pod.Annotations, tt.wantAnno) { + t.Errorf("annotations = %v, want %v", tt.pod.Annotations, tt.wantAnno) + } + }) + } +} diff --git a/config/crd/bases/nvidia.com_clusterpolicies.yaml b/config/crd/bases/nvidia.com_clusterpolicies.yaml index d82340856..85245ca2e 100644 --- a/config/crd/bases/nvidia.com_clusterpolicies.yaml +++ b/config/crd/bases/nvidia.com_clusterpolicies.yaml @@ -172,6 +172,239 @@ spec: (scope and select) objects. May match selectors of replication controllers and services. type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object priorityClassName: type: string rollingUpdate: @@ -181,6 +414,199 @@ spec: maxUnavailable: type: string type: object + securityContext: + description: 'Optional: Set default container-level security context + for all DaemonSet pods' + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object tolerations: description: 'Optional: Set tolerations' items: diff --git a/controllers/object_controls.go b/controllers/object_controls.go index fe7e9df4b..f8e248a25 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -783,14 +783,14 @@ func preProcessDaemonSet(obj *appsv1.DaemonSet, n ClusterPolicyController) error } // applyCommonDaemonsetMetadata adds additional labels and annotations to the daemonset podSpec if there are any specified -// by the user in the podSpec +// by the user in the podSpec. func applyCommonDaemonsetMetadata(obj *appsv1.DaemonSet, dsSpec *gpuv1.DaemonsetsSpec) { if len(dsSpec.Labels) > 0 { if obj.Spec.Template.Labels == nil { obj.Spec.Template.Labels = make(map[string]string) } for labelKey, labelValue := range dsSpec.Labels { - // if the user specifies an override of the "app" or the ""app.kubernetes.io/part-of"" key, we skip it. + // if the user specifies an override of the "app" or the "app.kubernetes.io/part-of" key, we skip it. // DaemonSet pod selectors are immutable, so we still want the pods to be selectable as before and working // with the existing daemon set selectors. if labelKey == "app" || labelKey == "app.kubernetes.io/part-of" { @@ -810,6 +810,49 @@ func applyCommonDaemonsetMetadata(obj *appsv1.DaemonSet, dsSpec *gpuv1.Daemonset } } +// mergeSecurityContext copies fields from defaults into target only when the target's field is nil. +func mergeSecurityContext(target, defaults *corev1.SecurityContext) { + if target == nil || defaults == nil { + return + } + if target.RunAsUser == nil && defaults.RunAsUser != nil { + target.RunAsUser = defaults.RunAsUser + } + if target.RunAsGroup == nil && defaults.RunAsGroup != nil { + target.RunAsGroup = defaults.RunAsGroup + } + if target.RunAsNonRoot == nil && defaults.RunAsNonRoot != nil { + target.RunAsNonRoot = defaults.RunAsNonRoot + } + if target.ReadOnlyRootFilesystem == nil && defaults.ReadOnlyRootFilesystem != nil { + target.ReadOnlyRootFilesystem = defaults.ReadOnlyRootFilesystem + } + if target.AllowPrivilegeEscalation == nil && defaults.AllowPrivilegeEscalation != nil { + target.AllowPrivilegeEscalation = defaults.AllowPrivilegeEscalation + } + if target.Privileged == nil && defaults.Privileged != nil { + target.Privileged = defaults.Privileged + } + if target.Capabilities == nil && defaults.Capabilities != nil { + target.Capabilities = defaults.Capabilities.DeepCopy() + } + if target.SeccompProfile == nil && defaults.SeccompProfile != nil { + target.SeccompProfile = defaults.SeccompProfile.DeepCopy() + } + if target.SELinuxOptions == nil && defaults.SELinuxOptions != nil { + target.SELinuxOptions = defaults.SELinuxOptions.DeepCopy() + } + if target.AppArmorProfile == nil && defaults.AppArmorProfile != nil { + target.AppArmorProfile = defaults.AppArmorProfile.DeepCopy() + } + if target.ProcMount == nil && defaults.ProcMount != nil { + target.ProcMount = defaults.ProcMount + } + if target.WindowsOptions == nil && defaults.WindowsOptions != nil { + target.WindowsOptions = defaults.WindowsOptions.DeepCopy() + } +} + // Apply common config that is applicable for all Daemonsets func applyCommonDaemonsetConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec) error { // apply daemonset update strategy @@ -827,6 +870,29 @@ func applyCommonDaemonsetConfig(obj *appsv1.DaemonSet, config *gpuv1.ClusterPoli if len(config.Daemonsets.Tolerations) > 0 { obj.Spec.Template.Spec.Tolerations = config.Daemonsets.Tolerations } + + // set pod-level security context if specified + if config.Daemonsets.PodSecurityContext != nil { + obj.Spec.Template.Spec.SecurityContext = config.Daemonsets.PodSecurityContext + } + + // set container-level security context defaults + if config.Daemonsets.SecurityContext != nil { + for i := range obj.Spec.Template.Spec.Containers { + if obj.Spec.Template.Spec.Containers[i].SecurityContext == nil { + obj.Spec.Template.Spec.Containers[i].SecurityContext = config.Daemonsets.SecurityContext.DeepCopy() + } else { + mergeSecurityContext(obj.Spec.Template.Spec.Containers[i].SecurityContext, config.Daemonsets.SecurityContext) + } + } + for i := range obj.Spec.Template.Spec.InitContainers { + if obj.Spec.Template.Spec.InitContainers[i].SecurityContext == nil { + obj.Spec.Template.Spec.InitContainers[i].SecurityContext = config.Daemonsets.SecurityContext.DeepCopy() + } else { + mergeSecurityContext(obj.Spec.Template.Spec.InitContainers[i].SecurityContext, config.Daemonsets.SecurityContext) + } + } + } return nil } diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 986308a16..f0316cb4d 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -143,6 +143,11 @@ func (d Daemonset) WithTolerations(tolerations []corev1.Toleration) Daemonset { return d } +func (d Daemonset) WithPodSecurityContext(psc *corev1.PodSecurityContext) Daemonset { + d.Spec.Template.Spec.SecurityContext = psc + return d +} + func (d Daemonset) WithPodLabels(labels map[string]string) Daemonset { d.Spec.Template.Labels = labels return d @@ -708,6 +713,97 @@ func TestApplyCommonDaemonSetConfig(t *testing.T) { }}, errorExpected: true, }, + { + description: "podSecurityContext configured", + ds: NewDaemonset(), + dsSpec: gpuv1.DaemonsetsSpec{ + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsUser: ptr.To(int64(1000)), + RunAsGroup: ptr.To(int64(3000)), + FSGroup: ptr.To(int64(2000)), + RunAsNonRoot: ptr.To(true), + }, + }, + expectedDs: NewDaemonset().WithPodSecurityContext(&corev1.PodSecurityContext{ + RunAsUser: ptr.To(int64(1000)), + RunAsGroup: ptr.To(int64(3000)), + FSGroup: ptr.To(int64(2000)), + RunAsNonRoot: ptr.To(true), + }), + }, + { + description: "securityContext applied to container with nil SecurityContext", + ds: NewDaemonset().WithContainer(corev1.Container{Name: "test-ctr"}), + dsSpec: gpuv1.DaemonsetsSpec{ + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "test-ctr", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }), + }, + { + description: "securityContext merged into container with existing SecurityContext", + ds: NewDaemonset().WithContainer(corev1.Container{ + Name: "test-ctr", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + }), + dsSpec: gpuv1.DaemonsetsSpec{ + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }, + expectedDs: NewDaemonset().WithContainer(corev1.Container{ + Name: "test-ctr", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + RunAsUser: ptr.To(int64(0)), + }, + }), + }, + { + description: "securityContext applied to initContainer with nil SecurityContext and merged into initContainer with existing", + ds: NewDaemonset(). + WithInitContainer(corev1.Container{Name: "init-1"}). + WithInitContainer(corev1.Container{ + Name: "init-2", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + }, + }). + WithContainer(corev1.Container{Name: "main"}), + dsSpec: gpuv1.DaemonsetsSpec{ + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }, + expectedDs: NewDaemonset(). + WithInitContainer(corev1.Container{ + Name: "init-1", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }). + WithInitContainer(corev1.Container{ + Name: "init-2", + SecurityContext: &corev1.SecurityContext{ + Privileged: ptr.To(true), + RunAsUser: ptr.To(int64(0)), + }, + }). + WithContainer(corev1.Container{ + Name: "main", + SecurityContext: &corev1.SecurityContext{ + RunAsUser: ptr.To(int64(0)), + }, + }), + }, } for _, tc := range testCases { @@ -726,6 +822,56 @@ func TestApplyCommonDaemonSetConfig(t *testing.T) { } } +func TestMergeSecurityContext(t *testing.T) { + tests := []struct { + name string + target *corev1.SecurityContext + defaults *corev1.SecurityContext + want *corev1.SecurityContext + }{ + { + name: "target nil - no op", + target: nil, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0))}, + want: nil, + }, + { + name: "defaults nil - no op", + target: &corev1.SecurityContext{Privileged: ptr.To(true)}, + defaults: nil, + want: &corev1.SecurityContext{Privileged: ptr.To(true)}, + }, + { + name: "target empty - all defaults applied", + target: &corev1.SecurityContext{}, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0)), RunAsNonRoot: ptr.To(false)}, + want: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0)), RunAsNonRoot: ptr.To(false)}, + }, + { + name: "target has Privileged - defaults merge, target preserved", + target: &corev1.SecurityContext{Privileged: ptr.To(true)}, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0))}, + want: &corev1.SecurityContext{Privileged: ptr.To(true), RunAsUser: ptr.To(int64(0))}, + }, + { + name: "target has RunAsUser - not overwritten by defaults", + target: &corev1.SecurityContext{RunAsUser: ptr.To(int64(1000))}, + defaults: &corev1.SecurityContext{RunAsUser: ptr.To(int64(0))}, + want: &corev1.SecurityContext{RunAsUser: ptr.To(int64(1000))}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mergeSecurityContext(tt.target, tt.defaults) + if tt.want == nil { + require.Nil(t, tt.target) + return + } + require.Equal(t, tt.want, tt.target) + }) + } +} + func TestApplyCommonDaemonsetMetadata(t *testing.T) { testCases := []struct { description string diff --git a/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl b/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl index b540e3bbf..28dc3d426 100644 --- a/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl +++ b/deployments/gpu-operator/charts/node-feature-discovery/templates/_helpers.tpl @@ -52,6 +52,9 @@ helm.sh/chart: {{ include "node-feature-discovery.chart" . }} app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} {{- end }} app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.commonLabels }} +{{ toYaml . }} +{{- end }} {{- end -}} {{/* diff --git a/deployments/gpu-operator/charts/node-feature-discovery/values.yaml b/deployments/gpu-operator/charts/node-feature-discovery/values.yaml index 86712bd2e..bb9931fee 100644 --- a/deployments/gpu-operator/charts/node-feature-discovery/values.yaml +++ b/deployments/gpu-operator/charts/node-feature-discovery/values.yaml @@ -10,6 +10,9 @@ nameOverride: "" fullnameOverride: "" namespaceOverride: "" +# commonLabels are added to all resources created by this chart (DaemonSets, Deployments, etc.) +commonLabels: {} + featureGates: NodeFeatureGroupAPI: false diff --git a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml index d82340856..85245ca2e 100644 --- a/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml +++ b/deployments/gpu-operator/crds/nvidia.com_clusterpolicies.yaml @@ -172,6 +172,239 @@ spec: (scope and select) objects. May match selectors of replication controllers and services. type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object priorityClassName: type: string rollingUpdate: @@ -181,6 +414,199 @@ spec: maxUnavailable: type: string type: object + securityContext: + description: 'Optional: Set default container-level security context + for all DaemonSet pods' + properties: + allowPrivilegeEscalation: + description: |- + AllowPrivilegeEscalation controls whether a process can gain more + privileges than its parent process. This bool directly controls if + the no_new_privs flag will be set on the container process. + AllowPrivilegeEscalation is true always when the container is: + 1) run as Privileged + 2) has CAP_SYS_ADMIN + Note that this field cannot be set when spec.os.name is windows. + type: boolean + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by this container. If set, this profile + overrides the pod's appArmorProfile. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + capabilities: + description: |- + The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by the container runtime. + Note that this field cannot be set when spec.os.name is windows. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + x-kubernetes-list-type: atomic + type: object + privileged: + description: |- + Run container in privileged mode. + Processes in privileged containers are essentially equivalent to root on the host. + Defaults to false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + procMount: + description: |- + procMount denotes the type of proc mount to use for the containers. + The default value is Default which uses the container runtime defaults for + readonly paths and masked paths. + This requires the ProcMountType feature flag to be enabled. + Note that this field cannot be set when spec.os.name is windows. + type: string + readOnlyRootFilesystem: + description: |- + Whether this container has a read-only root filesystem. + Default is false. + Note that this field cannot be set when spec.os.name is windows. + type: boolean + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxOptions: + description: |- + The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by this container. If seccomp options are + provided at both the pod & container level, the container options + override the pod options. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options from the PodSecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object tolerations: description: 'Optional: Set tolerations' items: