From 6b6454d9074920eba56545bed544c71263727f44 Mon Sep 17 00:00:00 2001 From: Eric Meehan Date: Sun, 25 May 2025 22:10:39 -0400 Subject: [PATCH] Matrix Stack Added ansible-role-matrix-stack submodule. --- .gitmodules | 3 +++ host_vars/alpha-worker-0.yaml | 1 + main.yaml | 21 ++++++++++++++------- nvidia-device-plugin.yaml | 17 +++++++++++------ roles/ericomeehan.matrix-stack | 1 + test.yaml | 4 +++- 6 files changed, 33 insertions(+), 14 deletions(-) create mode 160000 roles/ericomeehan.matrix-stack diff --git a/.gitmodules b/.gitmodules index d5db6d5..52da8d1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,3 +19,6 @@ [submodule "roles/ericomeehan.libvirt-guest"] path = roles/ericomeehan.libvirt-guest url = git@gitea.eom.dev:DevOps/ansible-role-libvirt-guest.git +[submodule "roles/ericomeehan.matrix-stack"] + path = roles/ericomeehan.matrix-stack + url = git@gitea.eom.dev:DevOps/ansible-role-matrix-stack.git diff --git a/host_vars/alpha-worker-0.yaml b/host_vars/alpha-worker-0.yaml index 1a9fba1..3280ebf 100644 --- a/host_vars/alpha-worker-0.yaml +++ b/host_vars/alpha-worker-0.yaml @@ -1 +1,2 @@ name: alpha-worker-0 +nvidia_driver_needed: true diff --git a/main.yaml b/main.yaml index 2948dda..5f7bb26 100644 --- a/main.yaml +++ b/main.yaml @@ -64,6 +64,8 @@ - ../secrets.yaml roles: - role: ericomeehan.ericomeehan + - role: ericomeehan.nvidia_driver + when: nvidia_driver_needed == true - name: Initialize Kubernetes clusters hosts: clusters @@ -153,10 +155,10 @@ name: bitnami repo_url: https://charts.bitnami.com/bitnami - - name: Add nvdp repository + - name: Add nvidia repository kubernetes.core.helm_repository: - name: nvdp - repo_url: https://nvidia.github.io/k8s-device-plugin + name: nvidia + repo_url: https://helm.ngc.nvidia.com/nvidia - name: Update Helm repos command: helm repo update @@ -266,12 +268,17 @@ ingress: ingressClassName: nginx - - name: Deploy nvidia device plugin + - name: Deploy nvidia gpu operator kubernetes.core.helm: - name: nvdp - chart_ref: nvdp/nvidia-device-plugin - release_namespace: nvidia-device-plugin + name: gpu-operator + chart_ref: nvidia/gpu-operator + release_namespace: gpu-operator create_namespace: true + values: + driver: + enabled: false + toolkit: + enabled: false - name: Port forward HTTP(S) to Ingress Controllers hosts: localhost diff --git a/nvidia-device-plugin.yaml b/nvidia-device-plugin.yaml index 2edc9d9..b1c96cb 100644 --- a/nvidia-device-plugin.yaml +++ b/nvidia-device-plugin.yaml @@ -4,15 +4,20 @@ tasks: - name: Add nvdp repository kubernetes.core.helm_repository: - name: nvdp - repo_url: https://nvidia.github.io/k8s-device-plugin + name: nvidia + repo_url: https://helm.ngc.nvidia.com/nvidia - name: Update Helm repos command: helm repo update - - name: Deploy nvidia device plugin + - name: Deploy nvidia gpu operator kubernetes.core.helm: - name: nvdp - chart_ref: nvdp/nvidia-device-plugin - release_namespace: nvidia-device-plugin + name: gpu-operator + chart_ref: nvidia/gpu-operator + release_namespace: gpu-operator create_namespace: true + values: + driver: + enabled: false + toolkit: + enabled: false diff --git a/roles/ericomeehan.matrix-stack b/roles/ericomeehan.matrix-stack new file mode 160000 index 0000000..eed66cd --- /dev/null +++ b/roles/ericomeehan.matrix-stack @@ -0,0 +1 @@ +Subproject commit eed66cd1834e81e7eb7c341f4f06d0354ce00f82 diff --git a/test.yaml b/test.yaml index 7166056..baa622e 100644 --- a/test.yaml +++ b/test.yaml @@ -4,4 +4,6 @@ vars_files: - ../secrets.yaml roles: - - role: ericomeehan.eom + - role: ericomeehan.matrix-stack + vars: + server_name: eom.dev