Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions api/nvidia/v1/clusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ type DaemonsetsSpec struct {
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors=true
// +operator-sdk:gen-csv:customresourcedefinitions.specDescriptors.displayName="Rolling update configuration for all DaemonSet pods"
RollingUpdate *RollingUpdateSpec `json:"rollingUpdate,omitempty"`

// Optional: Set pod-level security context for all DaemonSet pods
PodSecurityContext *corev1.PodSecurityContext `json:"podSecurityContext,omitempty"`

// Optional: Set default container-level security context for all DaemonSet pods
SecurityContext *corev1.SecurityContext `json:"securityContext,omitempty"`
}

// InitContainerSpec describes configuration for initContainer image used with all components
Expand Down
10 changes: 10 additions & 0 deletions api/nvidia/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

426 changes: 426 additions & 0 deletions bundle/manifests/nvidia.com_clusterpolicies.yaml

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions cmd/nvidia-validator/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert/yaml"
cli "github.com/urfave/cli/v3"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -1317,6 +1318,8 @@ func (p *Plugin) runWorkload() error {
pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations
// update podSpec with node name, so it will just run on current node
pod.Spec.NodeName = nodeNameFlag
// apply labels and annotations from DaemonSet pod template (e.g. spec.daemonsets.labels)
applyDaemonsetMetadataToPod(pod, validatorDaemonset)

resourceName, err := p.getGPUResourceName()
if err != nil {
Expand Down Expand Up @@ -1404,6 +1407,30 @@ func loadPodSpec(podSpecPath string) (*corev1.Pod, error) {
return &pod, nil
}

// applyDaemonsetMetadataToPod merges labels and annotations from the DaemonSet pod template
// onto the validator workload pod (consistent with applyCommonDaemonsetMetadata).
func applyDaemonsetMetadataToPod(pod *corev1.Pod, daemonset *appsv1.DaemonSet) {
if daemonset.Spec.Template.Labels != nil {
if pod.Labels == nil {
pod.Labels = make(map[string]string)
}
for k, v := range daemonset.Spec.Template.Labels {
if k == "app" || k == "app.kubernetes.io/part-of" {
continue
}
pod.Labels[k] = v
}
}
if daemonset.Spec.Template.Annotations != nil {
if pod.Annotations == nil {
pod.Annotations = make(map[string]string)
}
for k, v := range daemonset.Spec.Template.Annotations {
pod.Annotations[k] = v
}
}
}

func (p *Plugin) countGPUResources() (int64, error) {
// get node info to check discovered GPU resources
node, err := getNode(p.ctx, p.kubeClient)
Expand Down Expand Up @@ -1581,6 +1608,8 @@ func (c *CUDA) runWorkload() error {
pod.Spec.Tolerations = validatorDaemonset.Spec.Template.Spec.Tolerations
// update podSpec with node name, so it will just run on current node
pod.Spec.NodeName = nodeNameFlag
// apply labels and annotations from DaemonSet pod template (e.g. spec.daemonsets.labels)
applyDaemonsetMetadataToPod(pod, validatorDaemonset)

opts := meta_v1.ListOptions{LabelSelector: labels.Set{"app": cudaValidatorLabelValue}.AsSelector().String(),
FieldSelector: fields.Set{"spec.nodeName": nodeNameFlag}.AsSelector().String()}
Expand Down
99 changes: 99 additions & 0 deletions cmd/nvidia-validator/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ package main
import (
"context"
"os"
"reflect"
"testing"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func Test_isValidComponent(t *testing.T) {
Expand Down Expand Up @@ -216,3 +221,97 @@ UNKNOWN_FEATURE: true`,
})
}
}

func Test_applyDaemonsetMetadataToPod(t *testing.T) {
tests := []struct {
name string
pod *corev1.Pod
daemonset *appsv1.DaemonSet
wantLabels map[string]string
wantAnno map[string]string
}{
{
name: "empty daemonset template - no change",
pod: &corev1.Pod{
ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}},
},
daemonset: &appsv1.DaemonSet{
Spec: appsv1.DaemonSetSpec{
Template: corev1.PodTemplateSpec{
ObjectMeta: meta_v1.ObjectMeta{},
},
},
},
wantLabels: map[string]string{"app": "nvidia-cuda-validator"},
wantAnno: nil,
},
{
name: "custom labels applied, app and app.kubernetes.io/part-of skipped",
pod: &corev1.Pod{
ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}},
},
daemonset: &appsv1.DaemonSet{
Spec: appsv1.DaemonSetSpec{
Template: corev1.PodTemplateSpec{
ObjectMeta: meta_v1.ObjectMeta{
Labels: map[string]string{
"app": "should-be-skipped",
"app.kubernetes.io/part-of": "should-be-skipped",
"custom.company.com/team": "gpu-ops",
"custom.company.com/env": "prod",
},
},
},
},
},
wantLabels: map[string]string{
"app": "nvidia-cuda-validator",
"custom.company.com/team": "gpu-ops",
"custom.company.com/env": "prod",
},
wantAnno: nil,
},
{
name: "annotations applied",
pod: &corev1.Pod{ObjectMeta: meta_v1.ObjectMeta{Labels: map[string]string{"app": "nvidia-cuda-validator"}}},
daemonset: &appsv1.DaemonSet{
Spec: appsv1.DaemonSetSpec{
Template: corev1.PodTemplateSpec{
ObjectMeta: meta_v1.ObjectMeta{
Annotations: map[string]string{"custom.annotation/key": "value"},
},
},
},
},
wantLabels: map[string]string{"app": "nvidia-cuda-validator"},
wantAnno: map[string]string{"custom.annotation/key": "value"},
},
{
name: "pod with nil labels gets labels and annotations",
pod: &corev1.Pod{},
daemonset: &appsv1.DaemonSet{
Spec: appsv1.DaemonSetSpec{
Template: corev1.PodTemplateSpec{
ObjectMeta: meta_v1.ObjectMeta{
Labels: map[string]string{"extra": "label"},
Annotations: map[string]string{"extra": "anno"},
},
},
},
},
wantLabels: map[string]string{"extra": "label"},
wantAnno: map[string]string{"extra": "anno"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
applyDaemonsetMetadataToPod(tt.pod, tt.daemonset)
if !reflect.DeepEqual(tt.pod.Labels, tt.wantLabels) {
t.Errorf("labels = %v, want %v", tt.pod.Labels, tt.wantLabels)
}
if !reflect.DeepEqual(tt.pod.Annotations, tt.wantAnno) {
t.Errorf("annotations = %v, want %v", tt.pod.Annotations, tt.wantAnno)
}
})
}
}
Loading