diff --git a/charts/gpu-base-operator/templates/namespaced_role.yaml b/charts/gpu-base-operator/templates/namespaced_role.yaml index 48c1e68..2729c28 100644 --- a/charts/gpu-base-operator/templates/namespaced_role.yaml +++ b/charts/gpu-base-operator/templates/namespaced_role.yaml @@ -43,6 +43,7 @@ rules: - get - list - update + - watch - apiGroups: - apps resources: diff --git a/charts/gpu-base-operator/templates/role.yaml b/charts/gpu-base-operator/templates/role.yaml index 594a1cc..e691942 100644 --- a/charts/gpu-base-operator/templates/role.yaml +++ b/charts/gpu-base-operator/templates/role.yaml @@ -108,12 +108,23 @@ rules: - rbac.authorization.k8s.io resources: - clusterrolebindings + verbs: + - create + - delete + - get + - list + - watch +- apiGroups: + - rbac.authorization.k8s.io + resources: - clusterroles verbs: - create - delete - get - list + - update + - watch - apiGroups: - resource.k8s.io resources: @@ -146,3 +157,15 @@ rules: - patch - update - watch +- apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + verbs: + - create + - delete + - get + - list + - update + - use + - watch diff --git a/charts/gpu-base-operator/values.yaml b/charts/gpu-base-operator/values.yaml index a480dca..472e077 100644 --- a/charts/gpu-base-operator/values.yaml +++ b/charts/gpu-base-operator/values.yaml @@ -12,11 +12,11 @@ operator: verbosity: 2 resources: limits: - cpu: 500m - memory: 128Mi + cpu: 200m + memory: 386Mi requests: - cpu: 10m - memory: 64Mi + cpu: 100m + memory: 256Mi privateRegistry: url: "" diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index faa866f..201c738 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -94,11 +94,11 @@ spec: # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ resources: limits: - cpu: 500m - memory: 128Mi + cpu: 200m + memory: 386Mi requests: - cpu: 10m - memory: 64Mi + cpu: 100m + memory: 256Mi volumeMounts: [] volumes: [] serviceAccountName: controller-manager diff --git a/config/rbac/namespaced_role.yaml b/config/rbac/namespaced_role.yaml index 46c244c..4d4fce1 100644 --- a/config/rbac/namespaced_role.yaml +++ b/config/rbac/namespaced_role.yaml @@ -42,6 +42,7 @@ rules: - get - list - update + - watch - apiGroups: - apps resources: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 594a1cc..e691942 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -108,12 +108,23 @@ rules: - rbac.authorization.k8s.io resources: - clusterrolebindings + verbs: + - create + - delete + - get + - list + - watch +- apiGroups: + - rbac.authorization.k8s.io + resources: - clusterroles verbs: - create - delete - get - list + - update + - watch - apiGroups: - resource.k8s.io resources: @@ -146,3 +157,15 @@ rules: - patch - update - watch +- apiGroups: + - security.openshift.io + resources: + - securitycontextconstraints + verbs: + - create + - delete + - get + - list + - update + - use + - watch diff --git a/internal/controller/clusterpolicy_controller.go b/internal/controller/clusterpolicy_controller.go index cd1193d..8ecd2ee 100644 --- a/internal/controller/clusterpolicy_controller.go +++ b/internal/controller/clusterpolicy_controller.go @@ -25,6 +25,7 @@ import ( apps "k8s.io/api/apps/v1" core "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" @@ -105,8 +106,8 @@ func addIfMissing(slice *[]string, s string) { // +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch -// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;create;delete -// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;create;delete +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterroles,verbs=get;list;create;delete;watch;update +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings,verbs=get;list;create;delete;watch // +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=validatingadmissionpolicies,verbs=get;list;create;delete // +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=validatingadmissionpolicybindings,verbs=get;list;create;delete // +kubebuilder:rbac:groups=resource.k8s.io,resources=deviceclasses,verbs=get;list;create;delete;watch;update @@ -119,6 +120,8 @@ func addIfMissing(slice *[]string, s string) { // +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;patch;update +// +kubebuilder:rbac:groups=security.openshift.io,resources=securitycontextconstraints,verbs=create;delete;get;list;watch;use;update + // Main Reconcile function for ClusterPolicy. Individual sub-controllers will be called from here to handle their // respective resources, and any errors they return will be aggregated into the ClusterPolicy status. func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -200,7 +203,9 @@ func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reques controllerutil.RemoveFinalizer(cp, clusterPolicyFinalizer) - if err := r.Update(ctx, cp); err != nil { + // The object may have been garbage-collected between the Get and this. + // NotFound here means the goal is already achieved. + if err := r.Update(ctx, cp); !apierrors.IsNotFound(err) { return ctrl.Result{}, err } @@ -221,11 +226,13 @@ func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reques retErr = err - for len(cp.Status.Errors) > maxKeptErrors { - cp.Status.Errors = cp.Status.Errors[1:] - } + if cp != nil { + for len(cp.Status.Errors) > maxKeptErrors { + cp.Status.Errors = cp.Status.Errors[1:] + } - cp.Status.Errors = append(cp.Status.Errors, err.Error()) + cp.Status.Errors = append(cp.Status.Errors, err.Error()) + } } } diff --git a/internal/controller/deviceplugin_controller.go b/internal/controller/deviceplugin_controller.go index 4f4c6e1..21588f6 100644 --- a/internal/controller/deviceplugin_controller.go +++ b/internal/controller/deviceplugin_controller.go @@ -46,6 +46,8 @@ type DevicePluginReconciler struct { const ( dpValue = "intel-gpu-plugin" xpumdVolumeName = "runxpumd" + + dpResourcePart = "gpu-dp" ) func logLevelForDp(spec *v1alpha.ClusterPolicy) int32 { @@ -196,6 +198,39 @@ func (r *DevicePluginReconciler) updateDaemonSetObject(ds *apps.DaemonSet, spec } else { removeXpumdMounts(cspec) } + + if r.Opts.OpenShift { + _, _, _, saName := buildOpenShiftNames(spec.Name, dpResourcePart) + cspec.ServiceAccountName = saName + } +} + +func (r *DevicePluginReconciler) createOpenShiftResourcesIfNotExists(ctx context.Context, cpName string) error { + sccName, roleName, bindingName, saName := buildOpenShiftNames(cpName, dpResourcePart) + + if err := createServiceAccount(ctx, r.Client, saName, r.Opts.Namespace); err != nil { + return fmt.Errorf("failed to ensure DP ServiceAccount: %w", err) + } + + if err := ensureSCC(ctx, r.Client, buildDevicePluginSCC(sccName)); err != nil { + return fmt.Errorf("failed to ensure DP SCC: %w", err) + } + + if err := createSCCRole(ctx, r.Client, roleName, sccName); err != nil { + return fmt.Errorf("failed to ensure DP SCC ClusterRole: %w", err) + } + + if err := createSCCRoleBinding(ctx, r.Client, bindingName, roleName, saName, r.Opts.Namespace); err != nil { + return fmt.Errorf("failed to ensure DP SCC ClusterRoleBinding: %w", err) + } + + return nil +} + +func (r *DevicePluginReconciler) cleanupOpenShiftResources(ctx context.Context, cpName string) { + sccName, roleName, bindingName, saName := buildOpenShiftNames(cpName, dpResourcePart) + + deleteOpenShiftSCCResources(ctx, r.Client, sccName, roleName, bindingName, saName, r.Opts.Namespace) } func (r *DevicePluginReconciler) createDaemonSet(ctx context.Context, obj client.Object) (ctrl.Result, error) { @@ -225,6 +260,10 @@ func (r *DevicePluginReconciler) removeDeploymentIfExists(ctx context.Context) ( crName := r.Opts.ReqName + if r.Opts.OpenShift { + r.cleanupOpenShiftResources(ctx, crName) + } + dss := &apps.DaemonSetList{} labels := client.MatchingLabels{ appLabel: dpValue, @@ -272,6 +311,14 @@ func (r *DevicePluginReconciler) Reconcile(ctx context.Context, cp *v1alpha.Clus return ctrl.Result{}, err } + if r.Opts.OpenShift { + if err := r.createOpenShiftResourcesIfNotExists(ctx, cp.Name); err != nil { + klog.Error(err, "unable to ensure OpenShift resources for DP") + + return ctrl.Result{}, err + } + } + if len(olderDs.Items) == 0 { return r.createDaemonSet(ctx, cp) } diff --git a/internal/controller/deviceplugin_controller_test.go b/internal/controller/deviceplugin_controller_test.go index f8aae85..79964e4 100644 --- a/internal/controller/deviceplugin_controller_test.go +++ b/internal/controller/deviceplugin_controller_test.go @@ -24,13 +24,14 @@ import ( . "github.com/onsi/gomega" apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + rbac "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - v1alpha "github.com/intel/gpu-base-operator/api/v1alpha1" "github.com/intel/gpu-base-operator/config/deployments" ) @@ -356,6 +357,116 @@ var _ = Describe("ClusterPolicy Controller for Device Plugin", func() { }) }) + Context("When reconciling Device Plugin and XPUM on OpenShift", func() { + defaultNamespace := "foobar-openshift" + const resourceName = "test-resource-ocp" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{Name: resourceName} + + BeforeEach(func() { + Expect(k8sClient.Create(ctx, &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: defaultNamespace}, + })).To(Succeed()) + }) + + AfterEach(func() { + resource := &v1alpha.ClusterPolicy{} + Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).To(Succeed()) + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + + // Clean up cluster-scoped OpenShift resources created by the reconciler. + deleteOpenShiftSCCResources(ctx, k8sClient, + resourceName+"-gpu-dp-scc", + resourceName+"-gpu-dp-scc-role", + resourceName+"-gpu-dp-scc-binding", + resourceName+"-gpu-dp", + defaultNamespace) + deleteOpenShiftSCCResources(ctx, k8sClient, + resourceName+"-xpu-manager-scc", + resourceName+"-xpu-manager-scc-role", + resourceName+"-xpu-manager-scc-binding", + resourceName+"-xpu-manager", + defaultNamespace) + }) + + It("creates SCC resources for DP and XPUM and sets ServiceAccountName on DaemonSets", func() { + By("creating the ClusterPolicy") + Expect(k8sClient.Create(ctx, &v1alpha.ClusterPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: resourceName}, + Spec: v1alpha.ClusterPolicySpec{ + ResourceRegistration: "dp", + ResourceMonitoring: true, + DevicePluginSpec: v1alpha.DevicePluginSpec{ + PluginImage: "intel/intel-gpu-plugin:test", + }, + XpuManagerSpec: v1alpha.XpuManagerSpec{ + Image: "intel/xpumanager:test", + }, + }, + })).To(Succeed()) + + reconciler := &ClusterPolicyReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Opts: ControllerOpts{ + Namespace: defaultNamespace, + OpenShift: true, + RequeueDelay: time.Millisecond * 50, + }, + } + + By("first reconcile creates SCC resources") + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: typeNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + By("DP ServiceAccount is created") + dpSA := &v1.ServiceAccount{} + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dp-sa", Namespace: defaultNamespace}, dpSA)).To(Succeed()) + + By("DP SCC is created") + dpSCC := &unstructured.Unstructured{} + dpSCC.SetAPIVersion(sccAPIVersion) + dpSCC.SetKind(sccKind) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dp-scc"}, dpSCC)).To(Succeed()) + + By("DP ClusterRole is created") + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dp-scc-role"}, &rbac.ClusterRole{})).To(Succeed()) + + By("DP ClusterRoleBinding is created") + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dp-scc-binding"}, &rbac.ClusterRoleBinding{})).To(Succeed()) + + By("XPUM ServiceAccount is created") + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-xpu-manager-sa", Namespace: defaultNamespace}, &v1.ServiceAccount{})).To(Succeed()) + + By("XPUM SCC is created") + xpumSCC := &unstructured.Unstructured{} + xpumSCC.SetAPIVersion(sccAPIVersion) + xpumSCC.SetKind(sccKind) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-xpu-manager-scc"}, xpumSCC)).To(Succeed()) + + By("DaemonSets have correct ServiceAccountNames") + dsList := &apps.DaemonSetList{} + Expect(k8sClient.List(ctx, dsList, client.InNamespace(defaultNamespace))).To(Succeed()) + Expect(dsList.Items).To(HaveLen(2)) + for _, ds := range dsList.Items { + switch ds.Name { + case resourceName + "-device-plugin": + Expect(ds.Spec.Template.Spec.ServiceAccountName).To(Equal(resourceName + "-gpu-dp-sa")) + case resourceName + "-xpu-manager": + Expect(ds.Spec.Template.Spec.ServiceAccountName).To(Equal(resourceName + "-xpu-manager-sa")) + default: + Fail("Unexpected DaemonSet: " + ds.Name) + } + } + + By("second reconcile is idempotent") + _, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: typeNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + }) + }) + }) var _ = Describe("Device Plugin", func() { diff --git a/internal/controller/dra_controller.go b/internal/controller/dra_controller.go index 66f4d8d..38389aa 100644 --- a/internal/controller/dra_controller.go +++ b/internal/controller/dra_controller.go @@ -30,6 +30,7 @@ import ( resv1 "k8s.io/api/resource/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -52,6 +53,8 @@ const ( healthCheckPort = 51516 gpuDeviceClass = "gpu.intel.com" vfioGpuDeviceClass = "gpu-vfio.intel.com" + + draResourcePart = "gpu-dra" ) func (r *DRAReconciler) createAll(ctx context.Context, cp *v1alpha.ClusterPolicy) error { @@ -201,6 +204,21 @@ func (r *DRAReconciler) deleteAll(ctx context.Context, crName string) error { }, } + if r.Opts.OpenShift { + sccName, roleName, bindingName, _ := buildOpenShiftNames(crName, draResourcePart) + + scc := &unstructured.Unstructured{} + scc.SetAPIVersion(sccAPIVersion) + scc.SetKind(sccKind) + scc.SetName(sccName) + + objects = append(objects, + scc, + &rbac.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: roleName}}, + &rbac.ClusterRoleBinding{ObjectMeta: metav1.ObjectMeta{Name: bindingName}}, + ) + } + for _, o := range objects { if err := r.Delete(ctx, o); err != nil { if errors.IsNotFound(err) { @@ -220,6 +238,32 @@ func (r *DRAReconciler) deleteAll(ctx context.Context, crName string) error { return nil } +func (r *DRAReconciler) createOpenShiftResourcesIfNotExists(ctx context.Context, crName string) error { + sccName, roleName, bindingName, _ := buildOpenShiftNames(crName, draResourcePart) + // Use the same service account name as the DaemonSet, which is based on the CR name. + saName := fmt.Sprintf("%s-gpu-dra", crName) + + if err := ensureSCC(ctx, r.Client, buildDRASCC(sccName)); err != nil { + klog.Error(err, "unable to ensure DRA SCC") + + return err + } + + if err := createSCCRole(ctx, r.Client, roleName, sccName); err != nil { + klog.Error(err, "unable to ensure DRA SCC ClusterRole") + + return err + } + + if err := createSCCRoleBinding(ctx, r.Client, bindingName, roleName, saName, r.Opts.Namespace); err != nil { + klog.Error(err, "unable to ensure DRA SCC ClusterRoleBinding") + + return err + } + + return nil +} + func (r *DRAReconciler) anyAllocatedResourceClaims(ctx context.Context) bool { var rcList resv1.ResourceClaimList @@ -369,6 +413,19 @@ func (r *DRAReconciler) updateDaemonSetObject(ds *apps.DaemonSet, spec *v1alpha. } else { cspec.ImagePullSecrets = nil } + + if r.Opts.OpenShift { + // On OpenShift, SELinux labels the container process as container_t which cannot + // write to host directories (e.g. /etc/cdi labeled etc_t). spc_t bypasses SELinux + // confinement for the container so it can write CDI specs to the host filesystem. + if cspec.Containers[0].SecurityContext == nil { + cspec.Containers[0].SecurityContext = &core.SecurityContext{} + } + + cspec.Containers[0].SecurityContext.SELinuxOptions = &core.SELinuxOptions{ + Type: "spc_t", + } + } } func (r *DRAReconciler) createDaemonSet(ctx context.Context, spec *v1alpha.ClusterPolicy) error { @@ -484,6 +541,14 @@ func (r *DRAReconciler) Reconcile(ctx context.Context, cp *v1alpha.ClusterPolicy return ctrl.Result{}, err } + if r.Opts.OpenShift { + if err := r.createOpenShiftResourcesIfNotExists(ctx, cp.Name); err != nil { + klog.Error(err, "unable to ensure OpenShift resources for DRA") + + return ctrl.Result{}, err + } + } + if len(olderDs.Items) == 0 { return ctrl.Result{}, r.createAll(ctx, cp) } diff --git a/internal/controller/dra_controller_test.go b/internal/controller/dra_controller_test.go index a0167c4..98b86e2 100644 --- a/internal/controller/dra_controller_test.go +++ b/internal/controller/dra_controller_test.go @@ -24,14 +24,15 @@ import ( . "github.com/onsi/gomega" apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + rbac "k8s.io/api/rbac/v1" resv1 "k8s.io/api/resource/v1" "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - v1alpha "github.com/intel/gpu-base-operator/api/v1alpha1" "github.com/intel/gpu-base-operator/config/deployments" ) @@ -255,6 +256,112 @@ var _ = Describe("ClusterPolicy Controller for DRA", func() { }) }) + Context("When reconciling DRA on OpenShift", func() { + defaultNamespace := "foobar-dra-ocp" + const resourceName = "test-resource-dra-ocp" + + ctx := context.Background() + + typeNamespacedName := types.NamespacedName{Name: resourceName} + + BeforeEach(func() { + Expect(k8sClient.Create(ctx, &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: defaultNamespace}, + })).To(Succeed()) + }) + + AfterEach(func() { + resource := &v1alpha.ClusterPolicy{} + Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).To(Succeed()) + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + + deleteOpenShiftSCCResources(ctx, k8sClient, + resourceName+"-gpu-dra-scc", + resourceName+"-gpu-dra-scc-role", + resourceName+"-gpu-dra-scc-binding", + "", "") + }) + + It("creates DRA SCC resources and cleans them up on DRA removal", func() { + By("creating the ClusterPolicy with DRA mode") + Expect(k8sClient.Create(ctx, &v1alpha.ClusterPolicy{ + ObjectMeta: metav1.ObjectMeta{Name: resourceName}, + Spec: v1alpha.ClusterPolicySpec{ + ResourceRegistration: "dra", + DynamicResourceAllocationSpec: v1alpha.DynamicResourceAllocationSpec{ + Image: "intel/gpu-dra-driver:test", + }, + }, + })).To(Succeed()) + + reconciler := &ClusterPolicyReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + Opts: ControllerOpts{ + Namespace: defaultNamespace, + DRAEnable: true, + OpenShift: true, + RequeueDelay: time.Millisecond * 50, + }, + } + + By("reconcile creates DRA SCC resources") + _, err := reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: typeNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + By("DRA SCC is created") + draSCC := &unstructured.Unstructured{} + draSCC.SetAPIVersion(sccAPIVersion) + draSCC.SetKind(sccKind) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dra-scc"}, draSCC)).To(Succeed()) + + By("DRA SCC ClusterRole is created") + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dra-scc-role"}, &rbac.ClusterRole{})).To(Succeed()) + + By("DRA SCC ClusterRoleBinding is created") + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dra-scc-binding"}, &rbac.ClusterRoleBinding{})).To(Succeed()) + + By("second reconcile is idempotent") + _, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: typeNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + By("creating a ResourceClaim exercises anyAllocatedResourceClaims loop") + resourceClaim := &resv1.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-rc-coverage", + Namespace: defaultNamespace, + }, + Spec: resv1.ResourceClaimSpec{ + Devices: resv1.DeviceClaim{ + Requests: []resv1.DeviceRequest{ + {Name: "gpu", Exactly: &resv1.ExactDeviceRequest{DeviceClassName: gpuDeviceClass}}, + }, + }, + }, + } + Expect(k8sClient.Create(ctx, resourceClaim)).To(Succeed()) + DeferCleanup(func() { _ = k8sClient.Delete(ctx, resourceClaim) }) + + By("switching to DP mode removes DRA SCC resources") + cp := &v1alpha.ClusterPolicy{} + Expect(k8sClient.Get(ctx, typeNamespacedName, cp)).To(Succeed()) + cp.Spec.ResourceRegistration = "dp" + cp.Spec.DevicePluginSpec = v1alpha.DevicePluginSpec{ + PluginImage: "intel/intel-gpu-plugin:test", + } + Expect(k8sClient.Update(ctx, cp)).To(Succeed()) + + _, err = reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: typeNamespacedName}) + Expect(err).NotTo(HaveOccurred()) + + draSCCCheck := &unstructured.Unstructured{} + draSCCCheck.SetAPIVersion(sccAPIVersion) + draSCCCheck.SetKind(sccKind) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: resourceName + "-gpu-dra-scc"}, draSCCCheck)). + To(Satisfy(errors.IsNotFound)) + }) + }) + Context("DaemonSet object update", func() { It("with tolerations, nodeSelector and pullsecret", func() { cp := &v1alpha.ClusterPolicy{ diff --git a/internal/controller/gpufirmwareupdate_controller.go b/internal/controller/gpufirmwareupdate_controller.go index 0cef95f..a242221 100644 --- a/internal/controller/gpufirmwareupdate_controller.go +++ b/internal/controller/gpufirmwareupdate_controller.go @@ -77,6 +77,8 @@ const ( withError = true withSuccess = false + + gpuFwUpdateResourcePart = "gpu-fwupdate" ) var ( @@ -430,6 +432,21 @@ func (r *GPUFirmwareUpdateReconciler) beginUpdate(ctx context.Context, fu *intel return ctrl.Result{}, err } + if r.Opts.OpenShift { + if err := r.ensureOpenShiftResources(ctx, fu.Name); err != nil { + klog.Errorf("Failed to ensure OpenShift SCC resources for firmware update %s: %v", fu.Name, err) + + fu.Status.State = stateError + fu.Status.Messages = append(fu.Status.Messages, fmt.Sprintf("failed to ensure OpenShift SCC resources: %v", err)) + if statusErr := r.Status().Update(ctx, fu); statusErr != nil { + klog.Error(statusErr, "unable to update GPUFirmwareUpdate status") + return ctrl.Result{}, statusErr + } + + return ctrl.Result{}, err + } + } + if err := r.verifyContentImage(ctx, fu); err != nil { klog.Errorf("Content image verification failed for firmware update %s: %v", fu.Name, err) @@ -540,6 +557,33 @@ func (r *GPUFirmwareUpdateReconciler) checkForNodeDrainStatus(ctx context.Contex return ctrl.Result{RequeueAfter: retryDuration}, nil } +func (r *GPUFirmwareUpdateReconciler) ensureOpenShiftResources(ctx context.Context, fuName string) error { + sccName, roleName, bindingName, saName := buildOpenShiftNames(fuName, gpuFwUpdateResourcePart) + + if err := createServiceAccount(ctx, r.Client, saName, r.Opts.Namespace); err != nil { + return fmt.Errorf("failed to ensure FW update ServiceAccount: %w", err) + } + + if err := ensureSCC(ctx, r.Client, buildFWUpdateSCC(sccName)); err != nil { + return fmt.Errorf("failed to ensure FW update SCC: %w", err) + } + + if err := createSCCRole(ctx, r.Client, roleName, sccName); err != nil { + return fmt.Errorf("failed to ensure FW update SCC ClusterRole: %w", err) + } + + if err := createSCCRoleBinding(ctx, r.Client, bindingName, roleName, saName, r.Opts.Namespace); err != nil { + return fmt.Errorf("failed to ensure FW update SCC ClusterRoleBinding: %w", err) + } + + return nil +} + +func (r *GPUFirmwareUpdateReconciler) cleanupOpenShiftResources(ctx context.Context, fuName string) { + sccName, roleName, bindingName, saName := buildOpenShiftNames(fuName, gpuFwUpdateResourcePart) + deleteOpenShiftSCCResources(ctx, r.Client, sccName, roleName, bindingName, saName, r.Opts.Namespace) +} + func (r *GPUFirmwareUpdateReconciler) createUpdateJobObjForNode(nodeName string, fu *intelcomv1alpha1.GPUFirmwareUpdate) *batch.Job { job := deployments.XpuManagerFWUpdateJob() job.Namespace = r.Opts.Namespace @@ -574,6 +618,11 @@ func (r *GPUFirmwareUpdateReconciler) createUpdateJobObjForNode(nodeName string, job.Spec.BackoffLimit = ptr.To(int32(0)) job.Spec.Completions = ptr.To(int32(1)) + if r.Opts.OpenShift { + _, _, _, saName := buildOpenShiftNames(fu.Name, gpuFwUpdateResourcePart) + job.Spec.Template.Spec.ServiceAccountName = saName + } + job.Spec.Template.Spec.InitContainers[0].Image = fu.Spec.Content.ContainerImage if fu.Spec.AMCCredentialsSecret != "" { @@ -911,6 +960,10 @@ func (r *GPUFirmwareUpdateReconciler) finalCleanup(ctx context.Context, fu *inte } } + if r.Opts.OpenShift { + r.cleanupOpenShiftResources(ctx, fu.Name) + } + if controllerutil.ContainsFinalizer(fu, gpuFwUpdateFinalizer) { controllerutil.RemoveFinalizer(fu, gpuFwUpdateFinalizer) diff --git a/internal/controller/misc_controller_test.go b/internal/controller/misc_controller_test.go index d6fa68f..2508506 100644 --- a/internal/controller/misc_controller_test.go +++ b/internal/controller/misc_controller_test.go @@ -593,6 +593,33 @@ var _ = Describe("Misc Prometheus", func() { err = r.Get(ctx, types.NamespacedName{Name: "intel-xpumanager", Namespace: r.Opts.Namespace}, service) Expect(err).To(HaveOccurred()) }) + + It("nil ClusterPolicy returns immediately", func() { + s := runtime.NewScheme() + Expect(core.AddToScheme(s)).To(Succeed()) + Expect(prometheusv1.AddToScheme(s)).To(Succeed()) + + r := MiscReconciler{} + r.Client = fake.NewClientBuilder().WithScheme(s).Build() + r.Opts = ControllerOpts{ReqName: "test-cluster-policy", Namespace: "default"} + + err := r.reconcilePrometheusComponents(context.Background(), nil) + Expect(err).NotTo(HaveOccurred()) + }) + + It("removes prometheus resources when they are already absent", func() { + s := runtime.NewScheme() + Expect(core.AddToScheme(s)).To(Succeed()) + Expect(apiextensionsv1.AddToScheme(s)).To(Succeed()) + Expect(prometheusv1.AddToScheme(s)).To(Succeed()) + + r := MiscReconciler{} + r.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(prometheusCRDObject()).Build() + r.Opts = ControllerOpts{ReqName: "test-cluster-policy", Namespace: "default"} + + err := r.removePrometheusComponents(context.Background(), "test-cluster-policy") + Expect(err).NotTo(HaveOccurred()) + }) }) }) diff --git a/internal/controller/openshift.go b/internal/controller/openshift.go new file mode 100644 index 0000000..74118ed --- /dev/null +++ b/internal/controller/openshift.go @@ -0,0 +1,295 @@ +/* +Copyright 2026 Intel Corporation. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "fmt" + + core "k8s.io/api/core/v1" + rbac "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + sccAPIVersion = "security.openshift.io/v1" + sccKind = "SecurityContextConstraints" +) + +// buildSCC constructs an OpenShift SecurityContextConstraints unstructured object. +func buildSCC(name string, spec map[string]interface{}) *unstructured.Unstructured { + scc := &unstructured.Unstructured{} + scc.SetAPIVersion(sccAPIVersion) + scc.SetKind(sccKind) + scc.SetName(name) + + for k, v := range spec { + scc.Object[k] = v + } + + return scc +} + +// buildDevicePluginSCC returns the SCC for the GPU device plugin daemonset. +func buildDevicePluginSCC(name string) *unstructured.Unstructured { + return buildSCC(name, map[string]interface{}{ + "allowPrivilegedContainer": false, + "allowHostDirVolumePlugin": true, + "allowHostIPC": false, + "allowHostNetwork": false, + "allowHostPID": false, + "allowHostPorts": false, + "allowPrivilegeEscalation": false, + "allowedCapabilities": nil, + "defaultAddCapabilities": nil, + "fsGroup": map[string]interface{}{"type": "RunAsAny"}, + "readOnlyRootFilesystem": false, + "requiredDropCapabilities": []interface{}{"ALL"}, + "runAsUser": map[string]interface{}{"type": "RunAsAny"}, + "seLinuxContext": map[string]interface{}{"type": "RunAsAny"}, + "seccompProfiles": []interface{}{"*"}, + "supplementalGroups": map[string]interface{}{"type": "RunAsAny"}, + "volumes": []interface{}{"hostPath", "emptyDir"}, + "users": []interface{}{}, + "groups": []interface{}{}, + }) +} + +// buildXpuManagerSCC returns the SCC for the XPU Manager daemonset. +// The daemonset runs as root and requires SYS_ADMIN capability. +func buildXpuManagerSCC(name string) *unstructured.Unstructured { + return buildSCC(name, map[string]interface{}{ + "allowPrivilegedContainer": false, + "allowHostDirVolumePlugin": true, + "allowHostIPC": false, + "allowHostNetwork": false, + "allowHostPID": false, + "allowHostPorts": false, + "allowPrivilegeEscalation": false, + "allowedCapabilities": []interface{}{"SYS_ADMIN"}, + "defaultAddCapabilities": nil, + "fsGroup": map[string]interface{}{"type": "RunAsAny"}, + "readOnlyRootFilesystem": false, + "requiredDropCapabilities": []interface{}{"ALL"}, + "runAsUser": map[string]interface{}{"type": "RunAsAny"}, + "seLinuxContext": map[string]interface{}{"type": "RunAsAny"}, + "seccompProfiles": []interface{}{"*"}, + "supplementalGroups": map[string]interface{}{"type": "RunAsAny"}, + "volumes": []interface{}{"hostPath", "configMap"}, + "users": []interface{}{}, + "groups": []interface{}{}, + }) +} + +// buildDRASCC returns the SCC for the GPU DRA driver daemonset. +// The daemonset uses hostPath volumes and mounts a service account token (projected volume). +func buildDRASCC(name string) *unstructured.Unstructured { + return buildSCC(name, map[string]interface{}{ + "allowPrivilegedContainer": false, + "allowHostDirVolumePlugin": true, + "allowHostIPC": false, + "allowHostNetwork": false, + "allowHostPID": false, + "allowHostPorts": false, + "allowPrivilegeEscalation": false, + "allowedCapabilities": nil, + "defaultAddCapabilities": nil, + "fsGroup": map[string]interface{}{"type": "RunAsAny"}, + "readOnlyRootFilesystem": false, + "requiredDropCapabilities": []interface{}{"ALL"}, + "runAsUser": map[string]interface{}{"type": "RunAsAny"}, + "seLinuxContext": map[string]interface{}{"type": "RunAsAny"}, + "seccompProfiles": []interface{}{"*"}, + "supplementalGroups": map[string]interface{}{"type": "RunAsAny"}, + "volumes": []interface{}{"hostPath", "projected"}, + "users": []interface{}{}, + "groups": []interface{}{}, + }) +} + +// buildFWUpdateSCC returns the SCC for the GPU firmware update Job pods. +// The updater container runs privileged as root to access GPU firmware interfaces. +func buildFWUpdateSCC(name string) *unstructured.Unstructured { + return buildSCC(name, map[string]interface{}{ + "allowPrivilegedContainer": true, + "allowHostDirVolumePlugin": false, + "allowHostIPC": false, + "allowHostNetwork": false, + "allowHostPID": false, + "allowHostPorts": false, + "allowPrivilegeEscalation": true, + "allowedCapabilities": nil, + "defaultAddCapabilities": nil, + "fsGroup": map[string]interface{}{"type": "RunAsAny"}, + "readOnlyRootFilesystem": false, + "requiredDropCapabilities": nil, + "runAsUser": map[string]interface{}{"type": "RunAsAny"}, + "seLinuxContext": map[string]interface{}{"type": "RunAsAny"}, + "seccompProfiles": []interface{}{"*"}, + "supplementalGroups": map[string]interface{}{"type": "RunAsAny"}, + "volumes": []interface{}{"emptyDir"}, + "users": []interface{}{}, + "groups": []interface{}{}, + }) +} + +func buildOpenShiftNames(crName, componentName string) (sccName string, roleName string, bindingName string, saName string) { + sccName = fmt.Sprintf("%s-%s-scc", crName, componentName) + roleName = fmt.Sprintf("%s-%s-scc-role", crName, componentName) + bindingName = fmt.Sprintf("%s-%s-scc-binding", crName, componentName) + saName = fmt.Sprintf("%s-%s-sa", crName, componentName) + + return sccName, roleName, bindingName, saName +} + +func ensureSCC(ctx context.Context, c client.Client, scc *unstructured.Unstructured) error { + existing := &unstructured.Unstructured{} + existing.SetAPIVersion(sccAPIVersion) + existing.SetKind(sccKind) + + err := c.Get(ctx, client.ObjectKey{Name: scc.GetName()}, existing) + if errors.IsNotFound(err) { + if createErr := c.Create(ctx, scc); createErr != nil && !errors.IsAlreadyExists(createErr) { + return createErr + } + + return nil + } + + if err != nil { + return err + } + + scc.SetResourceVersion(existing.GetResourceVersion()) + + return c.Update(ctx, scc) +} + +// createSCCRole creates a ClusterRole that allows using the named SCC if it does not already exist. +func createSCCRole(ctx context.Context, c client.Client, roleName, sccName string) error { + cr := &rbac.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: roleName}, + Rules: []rbac.PolicyRule{{ + APIGroups: []string{"security.openshift.io"}, + Resources: []string{"securitycontextconstraints"}, + ResourceNames: []string{sccName}, + Verbs: []string{"use"}, + }}, + } + + existing := &rbac.ClusterRole{} + + err := c.Get(ctx, client.ObjectKey{Name: roleName}, existing) + if errors.IsNotFound(err) { + if createErr := c.Create(ctx, cr); createErr != nil && !errors.IsAlreadyExists(createErr) { + return createErr + } + + return nil + } + + return err +} + +// createSCCRoleBinding creates a ClusterRoleBinding that binds the ServiceAccount to the SCC ClusterRole +// if the binding does not already exist. +func createSCCRoleBinding(ctx context.Context, c client.Client, bindingName, roleName, saName, namespace string) error { + crb := &rbac.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: bindingName}, + RoleRef: rbac.RoleRef{ + APIGroup: "rbac.authorization.k8s.io", + Kind: "ClusterRole", + Name: roleName, + }, + Subjects: []rbac.Subject{{ + Kind: "ServiceAccount", + Name: saName, + Namespace: namespace, + }}, + } + + existing := &rbac.ClusterRoleBinding{} + + err := c.Get(ctx, client.ObjectKey{Name: bindingName}, existing) + if errors.IsNotFound(err) { + if createErr := c.Create(ctx, crb); createErr != nil && !errors.IsAlreadyExists(createErr) { + return createErr + } + + return nil + } + + return err +} + +// createServiceAccount creates a namespace-scoped ServiceAccount if it does not already exist. +func createServiceAccount(ctx context.Context, c client.Client, name, namespace string) error { + sa := &core.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + + existing := &core.ServiceAccount{} + + err := c.Get(ctx, client.ObjectKey{Name: name, Namespace: namespace}, existing) + if errors.IsNotFound(err) { + if createErr := c.Create(ctx, sa); createErr != nil && !errors.IsAlreadyExists(createErr) { + return createErr + } + + return nil + } + + return err +} + +// deleteOpenShiftSCCResources deletes all OpenShift SCC-related resources for a component. +// Each resource is deleted independently; NotFound errors are ignored. +// Pass an empty saName or namespace to skip ServiceAccount deletion. +func deleteOpenShiftSCCResources(ctx context.Context, c client.Client, sccName, roleName, bindingName, saName, namespace string) { + scc := &unstructured.Unstructured{} + scc.SetAPIVersion(sccAPIVersion) + scc.SetKind(sccKind) + scc.SetName(sccName) + + if err := c.Delete(ctx, scc); err != nil && !errors.IsNotFound(err) { + klog.Errorf("Failed to delete SCC %s: %v", sccName, err) + } + + cr := &rbac.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: roleName}} + if err := c.Delete(ctx, cr); err != nil && !errors.IsNotFound(err) { + klog.Errorf("Failed to delete ClusterRole %s: %v", roleName, err) + } + + crb := &rbac.ClusterRoleBinding{ObjectMeta: metav1.ObjectMeta{Name: bindingName}} + if err := c.Delete(ctx, crb); err != nil && !errors.IsNotFound(err) { + klog.Errorf("Failed to delete ClusterRoleBinding %s: %v", bindingName, err) + } + + if saName != "" && namespace != "" { + sa := &core.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: saName, Namespace: namespace}} + if err := c.Delete(ctx, sa); err != nil && !errors.IsNotFound(err) { + klog.Errorf("Failed to delete ServiceAccount %s: %v", saName, err) + } + } +} diff --git a/internal/controller/openshift_test.go b/internal/controller/openshift_test.go new file mode 100644 index 0000000..24b85f8 --- /dev/null +++ b/internal/controller/openshift_test.go @@ -0,0 +1,262 @@ +/* +Copyright 2026 Intel Corporation. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + core "k8s.io/api/core/v1" + rbac "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var _ = Describe("OpenShift SCC helpers", func() { + const testOpenshiftNs = "default" + ctx := context.Background() + + Context("SCC builder functions", func() { + It("buildDevicePluginSCC sets correct fields", func() { + scc := buildDevicePluginSCC("dp-builder-test") + + Expect(scc.GetName()).To(Equal("dp-builder-test")) + Expect(scc.GetKind()).To(Equal("SecurityContextConstraints")) + Expect(scc.GetAPIVersion()).To(Equal("security.openshift.io/v1")) + Expect(scc.Object["allowPrivilegedContainer"]).To(BeFalse()) + Expect(scc.Object["allowPrivilegeEscalation"]).To(BeFalse()) + Expect(scc.Object["allowHostDirVolumePlugin"]).To(BeTrue()) + Expect(scc.Object["allowHostNetwork"]).To(BeFalse()) + + drops, ok := scc.Object["requiredDropCapabilities"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(drops).To(ContainElement("ALL")) + + vols, ok := scc.Object["volumes"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(vols).To(ContainElements("hostPath", "emptyDir")) + }) + + It("buildXpuManagerSCC sets correct fields", func() { + scc := buildXpuManagerSCC("xpum-builder-test") + + Expect(scc.GetName()).To(Equal("xpum-builder-test")) + Expect(scc.Object["allowPrivilegedContainer"]).To(BeFalse()) + + caps, ok := scc.Object["allowedCapabilities"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(caps).To(ContainElement("SYS_ADMIN")) + + drops, ok := scc.Object["requiredDropCapabilities"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(drops).To(ContainElement("ALL")) + + vols, ok := scc.Object["volumes"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(vols).To(ContainElements("hostPath", "configMap")) + }) + + It("buildDRASCC sets correct fields", func() { + scc := buildDRASCC("dra-builder-test") + + Expect(scc.GetName()).To(Equal("dra-builder-test")) + Expect(scc.Object["allowPrivilegedContainer"]).To(BeFalse()) + Expect(scc.Object["allowPrivilegeEscalation"]).To(BeFalse()) + + drops, ok := scc.Object["requiredDropCapabilities"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(drops).To(ContainElement("ALL")) + + vols, ok := scc.Object["volumes"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(vols).To(ContainElements("hostPath", "projected")) + }) + + It("buildFWUpdateSCC sets correct fields", func() { + scc := buildFWUpdateSCC("fwupdate-builder-test") + + Expect(scc.GetName()).To(Equal("fwupdate-builder-test")) + Expect(scc.GetKind()).To(Equal("SecurityContextConstraints")) + Expect(scc.Object["allowPrivilegedContainer"]).To(BeTrue()) + Expect(scc.Object["allowPrivilegeEscalation"]).To(BeTrue()) + Expect(scc.Object["allowHostDirVolumePlugin"]).To(BeFalse()) + Expect(scc.Object["allowHostNetwork"]).To(BeFalse()) + + vols, ok := scc.Object["volumes"].([]interface{}) + Expect(ok).To(BeTrue()) + Expect(vols).To(ContainElement("emptyDir")) + Expect(vols).NotTo(ContainElement("hostPath")) + }) + }) + + Context("ensureSCC", func() { + It("creates SCC when not found and is idempotent", func() { + name := "test-ensure-scc-create" + scc := buildDevicePluginSCC(name) + + By("first call creates the SCC") + Expect(ensureSCC(ctx, k8sClient, scc)).To(Succeed()) + + existing := &unstructured.Unstructured{} + existing.SetAPIVersion(sccAPIVersion) + existing.SetKind(sccKind) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: name}, existing)).To(Succeed()) + + By("second call is idempotent") + Expect(ensureSCC(ctx, k8sClient, buildDevicePluginSCC(name))).To(Succeed()) + + DeferCleanup(func() { + scc := &unstructured.Unstructured{} + scc.SetAPIVersion(sccAPIVersion) + scc.SetKind(sccKind) + scc.SetName(name) + _ = k8sClient.Delete(ctx, scc) + }) + }) + }) + + Context("ensureSCCRole", func() { + It("creates ClusterRole when not found and is idempotent", func() { + roleName := "test-ensure-scc-role" + sccName := "test-some-scc" + + By("first call creates the ClusterRole") + Expect(createSCCRole(ctx, k8sClient, roleName, sccName)).To(Succeed()) + + cr := &rbac.ClusterRole{} + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: roleName}, cr)).To(Succeed()) + Expect(cr.Rules).To(HaveLen(1)) + Expect(cr.Rules[0].ResourceNames).To(ContainElement(sccName)) + Expect(cr.Rules[0].Verbs).To(ContainElement("use")) + + By("second call is idempotent") + Expect(createSCCRole(ctx, k8sClient, roleName, sccName)).To(Succeed()) + + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, &rbac.ClusterRole{ObjectMeta: metav1.ObjectMeta{Name: roleName}}) + }) + }) + }) + + Context("ensureSCCRoleBinding", func() { + It("creates ClusterRoleBinding when not found and is idempotent", func() { + bindingName := "test-ensure-scc-binding" + roleName := "test-role-for-binding" + saName := "test-sa" + namespace := testOpenshiftNs + + By("first call creates the ClusterRoleBinding") + Expect(createSCCRoleBinding(ctx, k8sClient, bindingName, roleName, saName, namespace)).To(Succeed()) + + crb := &rbac.ClusterRoleBinding{} + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: bindingName}, crb)).To(Succeed()) + Expect(crb.RoleRef.Name).To(Equal(roleName)) + Expect(crb.Subjects).To(HaveLen(1)) + Expect(crb.Subjects[0].Name).To(Equal(saName)) + Expect(crb.Subjects[0].Namespace).To(Equal(namespace)) + + By("second call is idempotent") + Expect(createSCCRoleBinding(ctx, k8sClient, bindingName, roleName, saName, namespace)).To(Succeed()) + + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, &rbac.ClusterRoleBinding{ObjectMeta: metav1.ObjectMeta{Name: bindingName}}) + }) + }) + }) + + Context("ensureServiceAccount", func() { + ensureNs := testOpenshiftNs + + It("creates ServiceAccount when not found and is idempotent", func() { + saName := "test-ensure-sa" + + By("first call creates the ServiceAccount") + Expect(createServiceAccount(ctx, k8sClient, saName, ensureNs)).To(Succeed()) + + sa := &core.ServiceAccount{} + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: saName, Namespace: ensureNs}, sa)).To(Succeed()) + + By("second call is idempotent") + Expect(createServiceAccount(ctx, k8sClient, saName, ensureNs)).To(Succeed()) + + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, &core.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: saName, Namespace: ensureNs}}) + }) + }) + }) + + Context("deleteOpenShiftSCCResources", func() { + deleteNs := testOpenshiftNs + + It("deletes all resources when they exist", func() { + sccName := "test-delete-scc" + roleName := "test-delete-role" + bindingName := "test-delete-binding" + saName := "test-delete-sa" + + By("creating the resources") + Expect(ensureSCC(ctx, k8sClient, buildDevicePluginSCC(sccName))).To(Succeed()) + Expect(createSCCRole(ctx, k8sClient, roleName, sccName)).To(Succeed()) + Expect(createSCCRoleBinding(ctx, k8sClient, bindingName, roleName, saName, deleteNs)).To(Succeed()) + Expect(createServiceAccount(ctx, k8sClient, saName, deleteNs)).To(Succeed()) + + By("deleting all resources") + deleteOpenShiftSCCResources(ctx, k8sClient, sccName, roleName, bindingName, saName, deleteNs) + + scc := &unstructured.Unstructured{} + scc.SetAPIVersion(sccAPIVersion) + scc.SetKind(sccKind) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: sccName}, scc)).To(Satisfy(errors.IsNotFound)) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: roleName}, &rbac.ClusterRole{})).To(Satisfy(errors.IsNotFound)) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: bindingName}, &rbac.ClusterRoleBinding{})).To(Satisfy(errors.IsNotFound)) + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: saName, Namespace: deleteNs}, &core.ServiceAccount{})).To(Satisfy(errors.IsNotFound)) + }) + + It("tolerates NotFound for all resources", func() { + // None of these exist — should not panic or return error + deleteOpenShiftSCCResources(ctx, k8sClient, + "nonexistent-scc", "nonexistent-role", "nonexistent-binding", + "nonexistent-sa", deleteNs) + }) + + It("skips ServiceAccount deletion when saName is empty", func() { + sccName := "test-skip-sa-scc" + roleName := "test-skip-sa-role" + bindingName := "test-skip-sa-binding" + saName := "test-skip-sa" + + By("creating the resources") + Expect(ensureSCC(ctx, k8sClient, buildDevicePluginSCC(sccName))).To(Succeed()) + Expect(createSCCRole(ctx, k8sClient, roleName, sccName)).To(Succeed()) + Expect(createSCCRoleBinding(ctx, k8sClient, bindingName, roleName, saName, deleteNs)).To(Succeed()) + Expect(createServiceAccount(ctx, k8sClient, saName, deleteNs)).To(Succeed()) + + By("deleting with empty saName — SA should remain") + deleteOpenShiftSCCResources(ctx, k8sClient, sccName, roleName, bindingName, "", deleteNs) + + sa := &core.ServiceAccount{} + Expect(k8sClient.Get(ctx, client.ObjectKey{Name: saName, Namespace: deleteNs}, sa)).To(Succeed()) + + DeferCleanup(func() { + _ = k8sClient.Delete(ctx, &core.ServiceAccount{ObjectMeta: metav1.ObjectMeta{Name: saName, Namespace: deleteNs}}) + }) + }) + }) +}) diff --git a/internal/controller/suite_test.go b/internal/controller/suite_test.go index cba30e3..22daa3c 100644 --- a/internal/controller/suite_test.go +++ b/internal/controller/suite_test.go @@ -88,6 +88,7 @@ var _ = BeforeSuite(func() { CRDDirectoryPaths: []string{ filepath.Join("..", "..", "config", "crd", "bases"), filepath.Join("..", "..", "config", "deployments", "nfd", "crds"), + "testdata", }, ErrorIfCRDPathMissing: true, } diff --git a/internal/controller/testdata/scc-crd.yaml b/internal/controller/testdata/scc-crd.yaml new file mode 100644 index 0000000..bf36e46 --- /dev/null +++ b/internal/controller/testdata/scc-crd.yaml @@ -0,0 +1,23 @@ +# Minimal SecurityContextConstraints CRD fixture for envtest. +# OpenShift ships this CRD; we register a permissive stub so that tests can +# create/get/delete SCC objects without a real OpenShift cluster. +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: securitycontextconstraints.security.openshift.io +spec: + group: security.openshift.io + names: + kind: SecurityContextConstraints + listKind: SecurityContextConstraintsList + plural: securitycontextconstraints + singular: securitycontextconstraint + scope: Cluster + versions: + - name: v1 + served: true + storage: true + schema: + openAPIV3Schema: + type: object + x-kubernetes-preserve-unknown-fields: true diff --git a/internal/controller/xpumanager_controller.go b/internal/controller/xpumanager_controller.go index a82771b..794e88e 100644 --- a/internal/controller/xpumanager_controller.go +++ b/internal/controller/xpumanager_controller.go @@ -56,6 +56,8 @@ const ( otelConfigMountDir = "/etc/xpumd" otelConfigMountPath = otelConfigMountDir + "/otel-config.yaml" otelConfigHashKey = "gpu.intel.com/xpum-otel-config-hash" + + xpumResourcePart = "xpu-manager" ) type XpuManagerReconciler struct { @@ -459,6 +461,50 @@ func (r *XpuManagerReconciler) updateDaemonSetObject(ds *apps.DaemonSet, spec *v } else { cspec.ImagePullSecrets = nil } + + if r.Opts.OpenShift { + _, _, _, saName := buildOpenShiftNames(spec.Name, xpumResourcePart) + cspec.ServiceAccountName = saName + + if cspec.Containers[0].SecurityContext == nil { + cspec.Containers[0].SecurityContext = &core.SecurityContext{} + } + + // On OpenShift, SELinux labels the container process as container_t which cannot + // write to host directories or sysfs GPU PMU control files. spc_t bypasses SELinux + // confinement so xpumd can write to /sys/.../control and /run/xpumd. + cspec.Containers[0].SecurityContext.SELinuxOptions = &core.SELinuxOptions{ + Type: "spc_t", + } + } +} + +func (r *XpuManagerReconciler) createOpenShiftResourcesIfNotExists(ctx context.Context, cpName string) error { + sccName, roleName, bindingName, saName := buildOpenShiftNames(cpName, xpumResourcePart) + + if err := createServiceAccount(ctx, r.Client, saName, r.Opts.Namespace); err != nil { + return fmt.Errorf("failed to ensure XPUM ServiceAccount: %w", err) + } + + if err := ensureSCC(ctx, r.Client, buildXpuManagerSCC(sccName)); err != nil { + return fmt.Errorf("failed to ensure XPUM SCC: %w", err) + } + + if err := createSCCRole(ctx, r.Client, roleName, sccName); err != nil { + return fmt.Errorf("failed to ensure XPUM SCC ClusterRole: %w", err) + } + + if err := createSCCRoleBinding(ctx, r.Client, bindingName, roleName, saName, r.Opts.Namespace); err != nil { + return fmt.Errorf("failed to ensure XPUM SCC ClusterRoleBinding: %w", err) + } + + return nil +} + +func (r *XpuManagerReconciler) cleanupOpenShiftResources(ctx context.Context, cpName string) { + sccName, roleName, bindingName, saName := buildOpenShiftNames(cpName, xpumResourcePart) + + deleteOpenShiftSCCResources(ctx, r.Client, sccName, roleName, bindingName, saName, r.Opts.Namespace) } func (r *XpuManagerReconciler) createDaemonSet(ctx context.Context, obj client.Object) (ctrl.Result, error) { @@ -504,6 +550,10 @@ func (r *XpuManagerReconciler) createDaemonSet(ctx context.Context, obj client.O } func (r *XpuManagerReconciler) removeDeploymentIfExists(ctx context.Context) (ctrl.Result, error) { + if r.Opts.OpenShift { + r.cleanupOpenShiftResources(ctx, r.Opts.ReqName) + } + dss := &apps.DaemonSetList{} matching := client.MatchingLabels{ @@ -551,6 +601,14 @@ func (r *XpuManagerReconciler) Reconcile(ctx context.Context, cp *v1alpha.Cluste return ctrl.Result{}, err } + if r.Opts.OpenShift { + if err := r.createOpenShiftResourcesIfNotExists(ctx, cp.Name); err != nil { + klog.Error(err, "unable to ensure OpenShift resources for XPU Manager") + + return ctrl.Result{}, err + } + } + if len(olderDs.Items) == 0 { return r.createDaemonSet(ctx, cp) } diff --git a/internal/controller/xpumanager_controller_test.go b/internal/controller/xpumanager_controller_test.go index 74263c6..34e6ff6 100644 --- a/internal/controller/xpumanager_controller_test.go +++ b/internal/controller/xpumanager_controller_test.go @@ -179,4 +179,20 @@ var _ = Describe("XPU manager daemonset", func() { Expect(*ds.Spec.Template.Spec.AutomountServiceAccountToken).To(BeFalse()) }) }) + + Context("Log level mapping", func() { + It("maps LogLevel 1 to warn", func() { + cp := &v1alpha.ClusterPolicy{ + Spec: v1alpha.ClusterPolicySpec{LogLevel: 1}, + } + Expect(logLevelForXpum(cp)).To(Equal("warn")) + }) + + It("maps LogLevel 2 to info", func() { + cp := &v1alpha.ClusterPolicy{ + Spec: v1alpha.ClusterPolicySpec{LogLevel: 2}, + } + Expect(logLevelForXpum(cp)).To(Equal("info")) + }) + }) })