From 7f07dcd02614cfdacebc57868b2a463a2c423b29 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Fri, 11 Sep 2020 18:29:35 +0000 Subject: [PATCH 01/23] Ensure we have kernel-headers before installing driver --- tasks/install-redhat.yml | 64 ++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 36 deletions(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 65ec4ba..bea013c 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -1,4 +1,32 @@ --- +# We have to do this because the CentOS mirrors don't keep kernel-headers, etc +# for older kernels. +- name: ensure we have kernel-headers installed for the current kernel + block: + - name: attempt to install kernel support packages for current version + yum: + name: "{{ item }}-{{ ansible_kernel }}" + state: present + with_items: + - "kernel-headers" + - "kernel-tools" + - "kernel-tools-libs" + - "kernel-devel" + - "kernel-debug-devel" + environment: "{{proxy_env if proxy_env is defined else {}}}" + rescue: + - name: update the kernel to latest version so we have a supported version + yum: + name: + - "kernel" + - "kernel-headers" + - "kernel-tools" + - "kernel-tools-libs" + - "kernel-devel" + - "kernel-debug-devel" + state: latest + environment: "{{proxy_env if proxy_env is defined else {}}}" + - name: add epel repo yum_repository: name: epel @@ -26,39 +54,3 @@ register: install_driver environment: "{{proxy_env if proxy_env is defined else {}}}" -# The driver package pulls in the latest kernel-headers package, but not the -# latest kernel. Check to see if there is a mismatch. - -- name: check kernel versions - yum: - list: kernel - register: yum_list - -- name: register installed kernel version - debug: - msg: "{{ yum_list.results | selectattr('yumstate', 'equalto', 'installed') | list }}" - register: kernel_version - -- name: check kernel-headers versions - yum: - list: kernel-headers - register: yum_list - -- name: register installed kernel-headers version - debug: - msg: "{{ yum_list.results | selectattr('yumstate', 'equalto', 'installed') | list }}" - register: kernel_headers_version - -- name: update kernel if headers don't match - yum: - name: - - kernel - - kernel-tools - - kernel-tools-libs - - kernel-devel - - kernel-debug-devel - - kernel-headers - state: latest - register: kernel_update - when: kernel_version.msg[0].release != kernel_headers_version.msg[0].release - environment: "{{proxy_env if proxy_env is defined else {}}}" From 8ba1df5866c7d4b84121b0327c874d2e3447dec8 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Fri, 11 Sep 2020 18:42:45 +0000 Subject: [PATCH 02/23] reboot after kernel update --- tasks/install-redhat.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index bea013c..6f0225a 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -26,6 +26,8 @@ - "kernel-debug-devel" state: latest environment: "{{proxy_env if proxy_env is defined else {}}}" + - name: reboot to pick up the new kernel + reboot: - name: add epel repo yum_repository: From e80bcdb2ce2dab5ba0bf08dfd80c92eaeeeb5d31 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Fri, 11 Sep 2020 19:50:38 +0000 Subject: [PATCH 03/23] Expand documentation in README - Add a note that the role should be run from a separate ansible control node - Document the role variables available to change --- README.md | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/README.md b/README.md index 6dc977a..a2c85c7 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,15 @@ An Ansible role to install the NVIDIA driver from the NVIDIA CUDA repositories. +## Requirements + +In the process of installing the NVIDIA driver, this role will reboot the nodes where it runs. +Because of this, we strongly recommend that you run `ansible-playbook` from a separate node than the GPU nodes where you are installing the driver. + +If you attempt to run Ansible on the same node where you are installing the driver, this role will either: + +* Refuse to proceed with an error like `Running reboot with local connection would reboot the control node` (if running with the `local` connection) +* Reboot the node you're running on, interrupting the playbook execution! (if running the an `ssh` connection against localhost) ## Installing @@ -11,6 +20,37 @@ This role can be installed using [Ansible Galaxy](https://galaxy.ansible.com/nvi $ ansible-galaxy install nvidia.nvidia_driver ``` +## Role variables + +| Variable | Default value | Description | +| -------- | ------------- | ----------- | +| `nvidia_driver_package_state` | `"present"` | Package state for NVIDIA driver packages | +| `nvidia_driver_package_version` | `""` | Package version to install. Note that this should match the actual version of the deb or RPM package to be installed. | +| `nvidia_driver_persistence_mode_on` | `yes` | Whether to enable persistence mode (boolean) | +| `nvidia_driver_skip_reboot` | `no` | Whether to skip rebooting the node during the install | +| `nvidia_driver_module_file` | `"/etc/modprobe.d/nvidia.conf"` | Filename to use for NVIDIA driver parameters | +| `nvidia_driver_module_params` | `""` | Parameters to pass to the NVIDIA driver | + +### Red Hat specific variables + +| `nvidia_driver_rhel_epel_repo_baseurl` | `"https://download.fedoraproject.org/pub/epel/$releasever/$basearch/"` | Base URL to use for EPEL repo | +| `nvidia_driver_rhel_epel_repo_gpgkey` | `"https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}"` | GPG key for the EPEL repo | +| `nvidia_driver_rhel_cuda_repo_baseurl` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/"` | Base URL to use for CUDA repo | +| `nvidia_driver_rhel_cuda_repo_gpgkey` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo | + +### Ubuntu specific variables + +| `nvidia_driver_ubuntu_cuda_repo_baseurl` | `"http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}"` | Base URL to use for CUDA repo | +| `nvidia_driver_ubuntu_cuda_repo_gpgkey_url` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo | +| `nvidia_driver_ubuntu_cuda_repo_gpgkey_id` | `"7fa2af80"` | GPG key ID for the CUDA repo | + +## Example playbook + +``` +- hosts: gpu_nodes + roles: + - nvidia.nvidia_driver +``` ## Supported distributions From 6609d287a97959ed8e3f81f26fd246cfabbcc296 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Fri, 11 Sep 2020 19:55:08 +0000 Subject: [PATCH 04/23] fix table formatting --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index a2c85c7..adf52b1 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ $ ansible-galaxy install nvidia.nvidia_driver ## Role variables + | Variable | Default value | Description | | -------- | ------------- | ----------- | | `nvidia_driver_package_state` | `"present"` | Package state for NVIDIA driver packages | @@ -33,6 +34,9 @@ $ ansible-galaxy install nvidia.nvidia_driver ### Red Hat specific variables + +| Variable | Default value | Description | +| -------- | ------------- | ----------- | | `nvidia_driver_rhel_epel_repo_baseurl` | `"https://download.fedoraproject.org/pub/epel/$releasever/$basearch/"` | Base URL to use for EPEL repo | | `nvidia_driver_rhel_epel_repo_gpgkey` | `"https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}"` | GPG key for the EPEL repo | | `nvidia_driver_rhel_cuda_repo_baseurl` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/"` | Base URL to use for CUDA repo | @@ -40,6 +44,9 @@ $ ansible-galaxy install nvidia.nvidia_driver ### Ubuntu specific variables + +| Variable | Default value | Description | +| -------- | ------------- | ----------- | | `nvidia_driver_ubuntu_cuda_repo_baseurl` | `"http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}"` | Base URL to use for CUDA repo | | `nvidia_driver_ubuntu_cuda_repo_gpgkey_url` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo | | `nvidia_driver_ubuntu_cuda_repo_gpgkey_id` | `"7fa2af80"` | GPG key ID for the CUDA repo | From 68cffed89beab7c410ccf3552e63bb509cc9b4c5 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Mon, 16 Nov 2020 18:23:03 +0000 Subject: [PATCH 05/23] Ubuntu: Use NVIDIA server drivers from Canonical - Add support for using the NVIDIA headless server packages from the upstream Canonical repos - Keep support for using the CUDA repository instead, but make non-default --- defaults/main.yml | 21 ++++++++++++++++-- tasks/install-ubuntu-cuda-repo.yml | 35 ++++++++++++++++++++++++++++++ tasks/install-ubuntu.yml | 28 ++++-------------------- tasks/main.yml | 8 +++++-- 4 files changed, 64 insertions(+), 28 deletions(-) create mode 100644 tasks/install-ubuntu-cuda-repo.yml diff --git a/defaults/main.yml b/defaults/main.yml index 528d531..37dc3b8 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -5,13 +5,30 @@ nvidia_driver_skip_reboot: no nvidia_driver_module_file: /etc/modprobe.d/nvidia.conf nvidia_driver_module_params: '' -# RedHat family +############################################################################## +# RedHat family # +############################################################################## nvidia_driver_rhel_epel_repo_baseurl: "https://download.fedoraproject.org/pub/epel/$releasever/$basearch/" nvidia_driver_rhel_epel_repo_gpgkey: "https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}" nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/" nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub" -# Ubuntu + +############################################################################## +# Ubuntu # +############################################################################## + +# Determine if we should install from CUDA repo instead of Canonical repos +nvidia_driver_ubuntu_install_from_cuda_repo: no + +# Installing with Canonical repositories +nvidia_driver_ubuntu_branch: "450" +nvidia_driver_ubuntu_packages: +- "nvidia-headless-{{ nvidia_driver_ubuntu_branch }}-server" +- "nvidia-utils-{{ nvidia_driver_ubuntu_branch }}-server" + +# Installing with CUDA repositories nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub" nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80" nvidia_driver_ubuntu_cuda_repo_baseurl: "http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}" +nvidia_driver_ubuntu_cuda_package: "cuda-drivers" diff --git a/tasks/install-ubuntu-cuda-repo.yml b/tasks/install-ubuntu-cuda-repo.yml new file mode 100644 index 0000000..7aab994 --- /dev/null +++ b/tasks/install-ubuntu-cuda-repo.yml @@ -0,0 +1,35 @@ +--- +- name: remove ppa + apt_repository: + repo: ppa:graphics-drivers/ppa + state: absent + +- name: add pin file + copy: + src: "cuda-ubuntu.pin" + dest: "/etc/apt/preferences.d/cuda-repository-pin-600" + owner: "root" + group: "root" + mode: "0644" + +- name: add key + apt_key: + url: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_url }}" + id: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}" + environment: "{{proxy_env if proxy_env is defined else {}}}" + + +- name: add repo + apt_repository: + repo: "deb {{ nvidia_driver_ubuntu_cuda_repo_baseurl }} /" + update_cache: yes + environment: "{{proxy_env if proxy_env is defined else {}}}" + +- name: install driver packages + apt: + name: "{{ nvidia_driver_package_version | ternary(nvidia_driver_ubuntu_cuda_package+'='+nvidia_driver_package_version, nvidia_driver_ubuntu_cuda_package) }}" + state: "{{ nvidia_driver_package_state }}" + autoremove: "{{ nvidia_driver_package_state == 'absent' }}" + purge: "{{ nvidia_driver_package_state == 'absent' }}" + register: install_driver + environment: "{{proxy_env if proxy_env is defined else {}}}" diff --git a/tasks/install-ubuntu.yml b/tasks/install-ubuntu.yml index 7bc40b4..7aa876c 100644 --- a/tasks/install-ubuntu.yml +++ b/tasks/install-ubuntu.yml @@ -4,32 +4,12 @@ repo: ppa:graphics-drivers/ppa state: absent -- name: add pin file - copy: - src: "cuda-ubuntu.pin" - dest: "/etc/apt/preferences.d/cuda-repository-pin-600" - owner: "root" - group: "root" - mode: "0644" - -- name: add key - apt_key: - url: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_url }}" - id: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}" - environment: "{{proxy_env if proxy_env is defined else {}}}" - - -- name: add repo - apt_repository: - repo: "deb {{ nvidia_driver_ubuntu_cuda_repo_baseurl }} /" - update_cache: yes - environment: "{{proxy_env if proxy_env is defined else {}}}" - - name: install driver packages apt: - name: "{{ nvidia_driver_package_version | ternary('cuda-drivers='+nvidia_driver_package_version, 'cuda-drivers') }}" - state: "{{ nvidia_driver_package_state }}" + name: "{{ nvidia_driver_package_version | ternary(item+'='+nvidia_driver_package_version, item) }}" + state: "{{ nvidia_driver_package_version }}" autoremove: "{{ nvidia_driver_package_state == 'absent' }}" purge: "{{ nvidia_driver_package_state == 'absent' }}" + with_items: "{{ nvidia_driver_ubuntu_packages }}" register: install_driver - environment: "{{proxy_env if proxy_env is defined else {}}}" + envrionment: "{{proxy_env if proxy_env is defined else {}}}" diff --git a/tasks/main.yml b/tasks/main.yml index 9ab9c4f..d33f672 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -5,9 +5,13 @@ state: absent ignore_errors: true -- name: ubuntu install tasks +- name: ubuntu install tasks (canonical repos) include_tasks: install-ubuntu.yml - when: ansible_distribution == 'Ubuntu' + when: ansible_distribution == 'Ubuntu' and (not nvidia_driver_ubuntu_install_from_cuda_repo) + +- name: ubuntu install tasks (CUDA repo) + include_tasks: install-ubuntu-cuda-repo.yml + when: ansible_distribution == 'Ubuntu' and nvidia_driver_ubuntu_install_from_cuda_repo - name: redhat family install tasks include_tasks: install-redhat.yml From 4fcec816f5635440646bcc40e4192f2b9b245f15 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Mon, 16 Nov 2020 18:25:54 +0000 Subject: [PATCH 06/23] add a note on distro support --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index adf52b1..f3fcdeb 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,8 @@ $ ansible-galaxy install nvidia.nvidia_driver Currently, this role supports the following Linux distributions: * NVIDIA DGX OS 4 +* NVIDIA DGX OS 5 * Ubuntu 18.04 LTS +* Ubuntu 20.04 LTS * CentOS 7 * Red Hat Enterprise Linux 7 From 9496992a2116148a7ed30c1761e3e0c282101877 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Mon, 16 Nov 2020 18:45:02 +0000 Subject: [PATCH 07/23] typo fix --- tasks/install-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/install-ubuntu.yml b/tasks/install-ubuntu.yml index 7aa876c..fb6fc74 100644 --- a/tasks/install-ubuntu.yml +++ b/tasks/install-ubuntu.yml @@ -12,4 +12,4 @@ purge: "{{ nvidia_driver_package_state == 'absent' }}" with_items: "{{ nvidia_driver_ubuntu_packages }}" register: install_driver - envrionment: "{{proxy_env if proxy_env is defined else {}}}" + environment: "{{proxy_env if proxy_env is defined else {}}}" From 4138b45d40216468e7df67ae1d2ac0d9d58c5546 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Mon, 16 Nov 2020 18:46:29 +0000 Subject: [PATCH 08/23] use correct var --- tasks/install-ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/install-ubuntu.yml b/tasks/install-ubuntu.yml index fb6fc74..bcd0a39 100644 --- a/tasks/install-ubuntu.yml +++ b/tasks/install-ubuntu.yml @@ -7,7 +7,7 @@ - name: install driver packages apt: name: "{{ nvidia_driver_package_version | ternary(item+'='+nvidia_driver_package_version, item) }}" - state: "{{ nvidia_driver_package_version }}" + state: "{{ nvidia_driver_package_state }}" autoremove: "{{ nvidia_driver_package_state == 'absent' }}" purge: "{{ nvidia_driver_package_state == 'absent' }}" with_items: "{{ nvidia_driver_ubuntu_packages }}" From a7ea9206a5e7b07550bc54ea1c625215fec9e22d Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Mon, 16 Nov 2020 18:54:36 +0000 Subject: [PATCH 09/23] Add notes on install process to README --- README.md | 7 +++++++ defaults/.main.yml.swp | Bin 0 -> 12288 bytes 2 files changed, 7 insertions(+) create mode 100644 defaults/.main.yml.swp diff --git a/README.md b/README.md index f3fcdeb..8b8279a 100644 --- a/README.md +++ b/README.md @@ -44,12 +44,19 @@ $ ansible-galaxy install nvidia.nvidia_driver ### Ubuntu specific variables +For Ubuntu installs, you have the choice of installing from the Canonical repositories and the NVIDIA CUDA repositories. + +By default, the Canonical repositories will be used, and the driver installed will be the headless server driver. | Variable | Default value | Description | | -------- | ------------- | ----------- | +| `nvidia_driver_ubuntu_install_from_cuda_repo` | `no` | Flag whether to use the CUDA repo | +| `nvidia_driver_ubuntu_branch` | `450` | Driver branch to use for the install | +| `nvidia_driver_ubuntu_packages` | `["nvidia-headless-450-server", "nvidia-headless-450-utils"]` | Package names to install from Canonical repo | | `nvidia_driver_ubuntu_cuda_repo_baseurl` | `"http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}"` | Base URL to use for CUDA repo | | `nvidia_driver_ubuntu_cuda_repo_gpgkey_url` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo | | `nvidia_driver_ubuntu_cuda_repo_gpgkey_id` | `"7fa2af80"` | GPG key ID for the CUDA repo | +| `nvidia_driver_ubuntu_cuda_package` | `"cuda-drivers"` | Package name to install from CUDA repo | ## Example playbook diff --git a/defaults/.main.yml.swp b/defaults/.main.yml.swp new file mode 100644 index 0000000000000000000000000000000000000000..291af5d1fe031a161bc0d2d3a446f83600afebf3 GIT binary patch literal 12288 zcmeI2&x;&I6vrzH{)n!Mf~N@OY#^uU+N=@7fQOK*n-~L2V8xu8>aN!_wNqWyR#oq^ zge3ohc=zsEPhLe31W)2g{X0ZYzWy<6rk!1X%_Z^*zSHzn*Q-~bdas&cUbgk&tvmc` zvc<5y!r0GWf06$1%==GoJz`8M$eq>Y5g-CY;QvM-R_7T0)Mroi3mf&>Y5g-CY;0Yuk6=UB%%h>!mbRPfzU;O_6>1D>gLVb#gsCQ9spe~@ke+mD~ zM+ww78;o5;efJ_`H&MH&_fWsSz}V-gd#I048>nB-V|@YYOVlT*k5N}pzdz5|Z>S$p zw^6U7K0|*RpAH(os1X4oKm>>Y5g-CjDuJ%FFYQYE(mJbctkb+!GOc7jms6PVQFGfD z|Ai4deT0X~rI`#+dxOJfF;)|S<${M`Tm?Q+?#LP^RnnPAa#speRYwrb9HKE0#Qpnx z;Ye#yY0n=#_{)i>)pQQWsa81kJB8eq#nr99JLtsWus)B)!i%J;vk}|mw`_>g7;UHg z2&*jl_5B;yc)MmDo!1Z;8}m_LY*|Nb!Wdbsk8# zbMyfaCabr+a{*yrc^f&eSe>+3ru8(n_Kpqq&CkG`uuZ#s>GSko zhka}Gu{0e7{qklHJXk&|*5|@7>)UCe!qvBq{%PS&S5j*4oyRk&i9{RQ!%5uTyEES1 z+a2HDxi{X~+xei$$G$Q;RXRkkGt5(G(^4KdpJE~gyjyWv4P02U^2OO$fO1||-W@<5 z6X&M_PY=j*n0sq&7}a+{-IS3Rvef4Izs2_Yqk1`{N~dZA z__Fe{3=@9o(je&j{z4mklL?5qz;BRl28j}#sbG00jJ6U literal 0 HcmV?d00001 From 6cb674908f6bac6ae43736a06167ff437653595f Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Mon, 16 Nov 2020 19:50:14 +0000 Subject: [PATCH 10/23] include galaxy metadata for 20.04 support --- defaults/.main.yml.swp | Bin 12288 -> 0 bytes meta/main.yml | 1 + 2 files changed, 1 insertion(+) delete mode 100644 defaults/.main.yml.swp diff --git a/defaults/.main.yml.swp b/defaults/.main.yml.swp deleted file mode 100644 index 291af5d1fe031a161bc0d2d3a446f83600afebf3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12288 zcmeI2&x;&I6vrzH{)n!Mf~N@OY#^uU+N=@7fQOK*n-~L2V8xu8>aN!_wNqWyR#oq^ zge3ohc=zsEPhLe31W)2g{X0ZYzWy<6rk!1X%_Z^*zSHzn*Q-~bdas&cUbgk&tvmc` zvc<5y!r0GWf06$1%==GoJz`8M$eq>Y5g-CY;QvM-R_7T0)Mroi3mf&>Y5g-CY;0Yuk6=UB%%h>!mbRPfzU;O_6>1D>gLVb#gsCQ9spe~@ke+mD~ zM+ww78;o5;efJ_`H&MH&_fWsSz}V-gd#I048>nB-V|@YYOVlT*k5N}pzdz5|Z>S$p zw^6U7K0|*RpAH(os1X4oKm>>Y5g-CjDuJ%FFYQYE(mJbctkb+!GOc7jms6PVQFGfD z|Ai4deT0X~rI`#+dxOJfF;)|S<${M`Tm?Q+?#LP^RnnPAa#speRYwrb9HKE0#Qpnx z;Ye#yY0n=#_{)i>)pQQWsa81kJB8eq#nr99JLtsWus)B)!i%J;vk}|mw`_>g7;UHg z2&*jl_5B;yc)MmDo!1Z;8}m_LY*|Nb!Wdbsk8# zbMyfaCabr+a{*yrc^f&eSe>+3ru8(n_Kpqq&CkG`uuZ#s>GSko zhka}Gu{0e7{qklHJXk&|*5|@7>)UCe!qvBq{%PS&S5j*4oyRk&i9{RQ!%5uTyEES1 z+a2HDxi{X~+xei$$G$Q;RXRkkGt5(G(^4KdpJE~gyjyWv4P02U^2OO$fO1||-W@<5 z6X&M_PY=j*n0sq&7}a+{-IS3Rvef4Izs2_Yqk1`{N~dZA z__Fe{3=@9o(je&j{z4mklL?5qz;BRl28j}#sbG00jJ6U diff --git a/meta/main.yml b/meta/main.yml index 3a9017a..da7c9f0 100644 --- a/meta/main.yml +++ b/meta/main.yml @@ -11,6 +11,7 @@ galaxy_info: versions: - 'xenial' - 'bionic' + - 'focal' - name: EL versions: - '7' From 961feadb30854ceb8ab06442ea18dddae199a8e4 Mon Sep 17 00:00:00 2001 From: nvhans <35655609+nvhans@users.noreply.github.com> Date: Wed, 25 Nov 2020 19:43:38 -0500 Subject: [PATCH 11/23] Update install-redhat.yml modify "add epel repo" to work with RHEL/CentOS 7 & 8 --- tasks/install-redhat.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 6f0225a..6c94b9d 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -30,11 +30,9 @@ reboot: - name: add epel repo - yum_repository: - name: epel - description: EPEL YUM repo - baseurl: "{{ nvidia_driver_rhel_epel_repo_baseurl }}" - gpgkey: "{{ nvidia_driver_rhel_epel_repo_gpgkey }}" + yum: + - name: "epel-release" + state: latest environment: "{{proxy_env if proxy_env is defined else {}}}" - name: install dependencies From e351331026f0ed64b44ce8f4183079699c5ae95b Mon Sep 17 00:00:00 2001 From: nvhans <35655609+nvhans@users.noreply.github.com> Date: Wed, 25 Nov 2020 19:45:19 -0500 Subject: [PATCH 12/23] Update main.yml remove static location for epel repo for more flexible 'yum epel repo install' with RHEL/CentOS 7 & 8 --- defaults/main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/defaults/main.yml b/defaults/main.yml index 528d531..7c27ba1 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -6,8 +6,6 @@ nvidia_driver_module_file: /etc/modprobe.d/nvidia.conf nvidia_driver_module_params: '' # RedHat family -nvidia_driver_rhel_epel_repo_baseurl: "https://download.fedoraproject.org/pub/epel/$releasever/$basearch/" -nvidia_driver_rhel_epel_repo_gpgkey: "https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}" nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/" nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub" From 9bb6978b57625c768128b0661a26d49b18b2e236 Mon Sep 17 00:00:00 2001 From: nvhans <35655609+nvhans@users.noreply.github.com> Date: Wed, 25 Nov 2020 20:50:39 -0500 Subject: [PATCH 13/23] Update install-redhat.yml --- tasks/install-redhat.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 6c94b9d..b844aa9 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -30,9 +30,10 @@ reboot: - name: add epel repo + become: true yum: - name: "epel-release" - state: latest + state: latest environment: "{{proxy_env if proxy_env is defined else {}}}" - name: install dependencies From 1bd1672c27bede9b73d0def9628d53d85b733568 Mon Sep 17 00:00:00 2001 From: nvhans <35655609+nvhans@users.noreply.github.com> Date: Wed, 25 Nov 2020 20:56:51 -0500 Subject: [PATCH 14/23] Update install-redhat.yml --- tasks/install-redhat.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index b844aa9..5bf7a68 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -32,7 +32,8 @@ - name: add epel repo become: true yum: - - name: "epel-release" + name: + - "epel-release" state: latest environment: "{{proxy_env if proxy_env is defined else {}}}" From b32aadd3fe470e5e02ac6912a7912db012746621 Mon Sep 17 00:00:00 2001 From: Jonathan Sherry Date: Wed, 30 Dec 2020 15:45:14 -0500 Subject: [PATCH 15/23] Invoking yum once while using a loop via squash_actions is deprecated. Signed-off-by: Jonathan Sherry --- tasks/install-redhat.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 5bf7a68..69904e9 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -5,14 +5,13 @@ block: - name: attempt to install kernel support packages for current version yum: - name: "{{ item }}-{{ ansible_kernel }}" + name: + - "kernel-headers-{{ ansible_kernel }} + - "kernel-tools-{{ ansible_kernel }}" + - "kernel-tools-libs-{{ ansible_kernel }}" + - "kernel-devel-{{ ansible_kernel }}" + - "kernel-debug-devel-{{ ansible_kernel }}" state: present - with_items: - - "kernel-headers" - - "kernel-tools" - - "kernel-tools-libs" - - "kernel-devel" - - "kernel-debug-devel" environment: "{{proxy_env if proxy_env is defined else {}}}" rescue: - name: update the kernel to latest version so we have a supported version From dcaeb4c5dbff7af56c82fcb07214507795be17b0 Mon Sep 17 00:00:00 2001 From: f4nha Date: Wed, 17 Feb 2021 21:39:42 +0000 Subject: [PATCH 16/23] typo missing " --- tasks/install-redhat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 69904e9..d3cdba1 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -6,7 +6,7 @@ - name: attempt to install kernel support packages for current version yum: name: - - "kernel-headers-{{ ansible_kernel }} + - "kernel-headers-{{ ansible_kernel }}" - "kernel-tools-{{ ansible_kernel }}" - "kernel-tools-libs-{{ ansible_kernel }}" - "kernel-devel-{{ ansible_kernel }}" From 29a839bbf26513e70aaf97184f82963d59725e91 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Tue, 23 Feb 2021 22:35:32 +0000 Subject: [PATCH 17/23] Use Fedora EPEL package to enable EPEL --- README.md | 3 +-- defaults/main.yml | 1 + tasks/install-redhat.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8b8279a..6625ede 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,7 @@ $ ansible-galaxy install nvidia.nvidia_driver | Variable | Default value | Description | | -------- | ------------- | ----------- | -| `nvidia_driver_rhel_epel_repo_baseurl` | `"https://download.fedoraproject.org/pub/epel/$releasever/$basearch/"` | Base URL to use for EPEL repo | -| `nvidia_driver_rhel_epel_repo_gpgkey` | `"https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}"` | GPG key for the EPEL repo | +| `epel_package` | `"https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm"` | Package to install to enable EPEL | | `nvidia_driver_rhel_cuda_repo_baseurl` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/"` | Base URL to use for CUDA repo | | `nvidia_driver_rhel_cuda_repo_gpgkey` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo | diff --git a/defaults/main.yml b/defaults/main.yml index e049dd8..5a32273 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -9,6 +9,7 @@ nvidia_driver_module_params: '' ############################################################################## # RedHat family # ############################################################################## +epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/" nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub" diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index d3cdba1..7870178 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -32,7 +32,7 @@ become: true yum: name: - - "epel-release" + - "{{ epel_package }}" state: latest environment: "{{proxy_env if proxy_env is defined else {}}}" From 9eb9c14725cdbc622a0bcddceae8d19919ac32a5 Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Thu, 18 Mar 2021 21:49:35 +0000 Subject: [PATCH 18/23] add nvidia-headless-no-dkms to package list --- defaults/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/defaults/main.yml b/defaults/main.yml index 5a32273..f0e4df5 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -26,6 +26,7 @@ nvidia_driver_ubuntu_branch: "450" nvidia_driver_ubuntu_packages: - "nvidia-headless-{{ nvidia_driver_ubuntu_branch }}-server" - "nvidia-utils-{{ nvidia_driver_ubuntu_branch }}-server" +- "nvidia-headless-no-dkms-{{ nvidia_driver_ubuntu_branch }}-server" # Installing with CUDA repositories nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub" From 17c7cb657609c6efb337987493e82b4b2c67086d Mon Sep 17 00:00:00 2001 From: Adam DeConinck Date: Thu, 25 Mar 2021 17:35:03 +0000 Subject: [PATCH 19/23] add kernel-source package as well --- defaults/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/defaults/main.yml b/defaults/main.yml index f0e4df5..2d8b0e2 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -27,6 +27,7 @@ nvidia_driver_ubuntu_packages: - "nvidia-headless-{{ nvidia_driver_ubuntu_branch }}-server" - "nvidia-utils-{{ nvidia_driver_ubuntu_branch }}-server" - "nvidia-headless-no-dkms-{{ nvidia_driver_ubuntu_branch }}-server" +- "nvidia-kernel-source-{{ nvidia_driver_ubuntu_branch }}-server" # Installing with CUDA repositories nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub" From 165b6a8381e2b1b62c32ccdfc917772840c8e6c0 Mon Sep 17 00:00:00 2001 From: Douglas Holt Date: Wed, 9 Jun 2021 19:48:43 +0000 Subject: [PATCH 20/23] Add support for RHEL/CentOS 8 --- tasks/install-redhat.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 7870178..4808f62 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -47,11 +47,20 @@ gpgkey: "{{ nvidia_driver_rhel_cuda_repo_gpgkey }}" environment: "{{proxy_env if proxy_env is defined else {}}}" -- name: install driver packages +- name: install driver packages RHEL/CentOS 7 and older yum: name: "{{ nvidia_driver_package_version | ternary('nvidia-driver-latest-dkms-'+nvidia_driver_package_version, 'nvidia-driver-latest-dkms') }}" state: "{{ nvidia_driver_package_state }}" autoremove: "{{ nvidia_driver_package_state == 'absent' }}" register: install_driver environment: "{{proxy_env if proxy_env is defined else {}}}" + when: ansible_distribution_major_version < '8' +- name: install driver packages RHEL/CentOS 8 and newer + dnf: + name: "{{ nvidia_driver_package_version | ternary('@nvidia-driver:'+nvidia_driver_package_version, '@nvidia-driver:latest-dkms') }}" + state: "{{ nvidia_driver_package_state }}" + autoremove: "{{ nvidia_driver_package_state == 'absent' }}" + register: install_driver + environment: "{{proxy_env if proxy_env is defined else {}}}" + when: ansible_distribution_major_version > '7' From 1593a2fd28b4771aabe872ffec05ee23f025634f Mon Sep 17 00:00:00 2001 From: Adam Tetelman Date: Thu, 10 Jun 2021 10:39:00 -0700 Subject: [PATCH 21/23] split rhel7/rhel8 install_driver register to avoid overwriting the value when skipped --- tasks/install-redhat.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 4808f62..6df75be 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -52,7 +52,7 @@ name: "{{ nvidia_driver_package_version | ternary('nvidia-driver-latest-dkms-'+nvidia_driver_package_version, 'nvidia-driver-latest-dkms') }}" state: "{{ nvidia_driver_package_state }}" autoremove: "{{ nvidia_driver_package_state == 'absent' }}" - register: install_driver + register: install_driver_rhel7 environment: "{{proxy_env if proxy_env is defined else {}}}" when: ansible_distribution_major_version < '8' @@ -61,6 +61,13 @@ name: "{{ nvidia_driver_package_version | ternary('@nvidia-driver:'+nvidia_driver_package_version, '@nvidia-driver:latest-dkms') }}" state: "{{ nvidia_driver_package_state }}" autoremove: "{{ nvidia_driver_package_state == 'absent' }}" - register: install_driver + register: install_driver_rhel8 environment: "{{proxy_env if proxy_env is defined else {}}}" when: ansible_distribution_major_version > '7' + +- name: Set install_driver.changed var for RHEL 7/8 + debug: + msg: Driver installed for RHEL + when: install_driver_rhel7.changed or install_driver_rhel8.changed + register: install_driver + changed_when: install_driver_rhel7.changed or install_driver_rhel8.changed From d5d80987fa3481fd5b31b490fe3a812c832c055d Mon Sep 17 00:00:00 2001 From: Shuichi Ohsawa Date: Mon, 30 Aug 2021 14:40:16 +0900 Subject: [PATCH 22/23] Fix ubuntu's cuda repo baseurl Signed-off-by: Shuichi Ohsawa --- defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/defaults/main.yml b/defaults/main.yml index 2d8b0e2..b0160cc 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -32,5 +32,5 @@ nvidia_driver_ubuntu_packages: # Installing with CUDA repositories nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub" nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80" -nvidia_driver_ubuntu_cuda_repo_baseurl: "http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}" +nvidia_driver_ubuntu_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}" nvidia_driver_ubuntu_cuda_package: "cuda-drivers" From 7c1f7d7c48e959ac2b057e2db6e06a1be86e3389 Mon Sep 17 00:00:00 2001 From: Douglas Holt Date: Wed, 29 Sep 2021 07:42:39 -0700 Subject: [PATCH 23/23] Add EPEL repo GPG key --- defaults/main.yml | 1 + tasks/install-redhat.yml | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/defaults/main.yml b/defaults/main.yml index 2d8b0e2..add1576 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -10,6 +10,7 @@ nvidia_driver_module_params: '' # RedHat family # ############################################################################## epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm" +epel_repo_key: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}" nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/" nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub" diff --git a/tasks/install-redhat.yml b/tasks/install-redhat.yml index 7870178..6d9efc5 100644 --- a/tasks/install-redhat.yml +++ b/tasks/install-redhat.yml @@ -28,6 +28,11 @@ - name: reboot to pick up the new kernel reboot: +- name: add epel repo gpg key + rpm_key: + key: "{{ epel_repo_key }}" + state: present + - name: add epel repo become: true yum: