undo latest commits
This commit is contained in:
parent
07739ff434
commit
8f53c057af
55
README.md
55
README.md
@ -2,6 +2,15 @@
|
|||||||
|
|
||||||
An Ansible role to install the NVIDIA driver from the NVIDIA CUDA repositories.
|
An Ansible role to install the NVIDIA driver from the NVIDIA CUDA repositories.
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
In the process of installing the NVIDIA driver, this role will reboot the nodes where it runs.
|
||||||
|
Because of this, we strongly recommend that you run `ansible-playbook` from a separate node than the GPU nodes where you are installing the driver.
|
||||||
|
|
||||||
|
If you attempt to run Ansible on the same node where you are installing the driver, this role will either:
|
||||||
|
|
||||||
|
* Refuse to proceed with an error like `Running reboot with local connection would reboot the control node` (if running with the `local` connection)
|
||||||
|
* Reboot the node you're running on, interrupting the playbook execution! (if running the an `ssh` connection against localhost)
|
||||||
|
|
||||||
## Installing
|
## Installing
|
||||||
|
|
||||||
@ -11,12 +20,58 @@ This role can be installed using [Ansible Galaxy](https://galaxy.ansible.com/nvi
|
|||||||
$ ansible-galaxy install nvidia.nvidia_driver
|
$ ansible-galaxy install nvidia.nvidia_driver
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Role variables
|
||||||
|
|
||||||
|
|
||||||
|
| Variable | Default value | Description |
|
||||||
|
| -------- | ------------- | ----------- |
|
||||||
|
| `nvidia_driver_package_state` | `"present"` | Package state for NVIDIA driver packages |
|
||||||
|
| `nvidia_driver_package_version` | `""` | Package version to install. Note that this should match the actual version of the deb or RPM package to be installed. |
|
||||||
|
| `nvidia_driver_persistence_mode_on` | `yes` | Whether to enable persistence mode (boolean) |
|
||||||
|
| `nvidia_driver_skip_reboot` | `no` | Whether to skip rebooting the node during the install |
|
||||||
|
| `nvidia_driver_module_file` | `"/etc/modprobe.d/nvidia.conf"` | Filename to use for NVIDIA driver parameters |
|
||||||
|
| `nvidia_driver_module_params` | `""` | Parameters to pass to the NVIDIA driver |
|
||||||
|
|
||||||
|
### Red Hat specific variables
|
||||||
|
|
||||||
|
|
||||||
|
| Variable | Default value | Description |
|
||||||
|
| -------- | ------------- | ----------- |
|
||||||
|
| `epel_package` | `"https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm"` | Package to install to enable EPEL |
|
||||||
|
| `nvidia_driver_rhel_cuda_repo_baseurl` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/"` | Base URL to use for CUDA repo |
|
||||||
|
| `nvidia_driver_rhel_cuda_repo_gpgkey` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo |
|
||||||
|
|
||||||
|
### Ubuntu specific variables
|
||||||
|
|
||||||
|
For Ubuntu installs, you have the choice of installing from the Canonical repositories and the NVIDIA CUDA repositories.
|
||||||
|
|
||||||
|
By default, the Canonical repositories will be used, and the driver installed will be the headless server driver.
|
||||||
|
|
||||||
|
| Variable | Default value | Description |
|
||||||
|
| -------- | ------------- | ----------- |
|
||||||
|
| `nvidia_driver_ubuntu_install_from_cuda_repo` | `no` | Flag whether to use the CUDA repo |
|
||||||
|
| `nvidia_driver_ubuntu_branch` | `450` | Driver branch to use for the install |
|
||||||
|
| `nvidia_driver_ubuntu_packages` | `["nvidia-headless-450-server", "nvidia-headless-450-utils"]` | Package names to install from Canonical repo |
|
||||||
|
| `nvidia_driver_ubuntu_cuda_repo_baseurl` | `"http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}"` | Base URL to use for CUDA repo |
|
||||||
|
| `nvidia_driver_ubuntu_cuda_repo_gpgkey_url` | `"https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub"` | GPG key for the CUDA repo |
|
||||||
|
| `nvidia_driver_ubuntu_cuda_repo_gpgkey_id` | `"7fa2af80"` | GPG key ID for the CUDA repo |
|
||||||
|
| `nvidia_driver_ubuntu_cuda_package` | `"cuda-drivers"` | Package name to install from CUDA repo |
|
||||||
|
|
||||||
|
## Example playbook
|
||||||
|
|
||||||
|
```
|
||||||
|
- hosts: gpu_nodes
|
||||||
|
roles:
|
||||||
|
- nvidia.nvidia_driver
|
||||||
|
```
|
||||||
|
|
||||||
## Supported distributions
|
## Supported distributions
|
||||||
|
|
||||||
Currently, this role supports the following Linux distributions:
|
Currently, this role supports the following Linux distributions:
|
||||||
|
|
||||||
* NVIDIA DGX OS 4
|
* NVIDIA DGX OS 4
|
||||||
|
* NVIDIA DGX OS 5
|
||||||
* Ubuntu 18.04 LTS
|
* Ubuntu 18.04 LTS
|
||||||
|
* Ubuntu 20.04 LTS
|
||||||
* CentOS 7
|
* CentOS 7
|
||||||
* Red Hat Enterprise Linux 7
|
* Red Hat Enterprise Linux 7
|
||||||
|
@ -5,13 +5,33 @@ nvidia_driver_skip_reboot: no
|
|||||||
nvidia_driver_module_file: /etc/modprobe.d/nvidia.conf
|
nvidia_driver_module_file: /etc/modprobe.d/nvidia.conf
|
||||||
nvidia_driver_module_params: ''
|
nvidia_driver_module_params: ''
|
||||||
|
|
||||||
# RedHat family
|
|
||||||
nvidia_driver_rhel_epel_repo_baseurl: "https://download.fedoraproject.org/pub/epel/$releasever/$basearch/"
|
##############################################################################
|
||||||
nvidia_driver_rhel_epel_repo_gpgkey: "https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}"
|
# RedHat family #
|
||||||
|
##############################################################################
|
||||||
|
epel_package: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ ansible_distribution_major_version }}.noarch.rpm"
|
||||||
|
epel_repo_key: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}"
|
||||||
nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/"
|
nvidia_driver_rhel_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/"
|
||||||
nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub"
|
nvidia_driver_rhel_cuda_repo_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _rhel_repo_dir }}/7fa2af80.pub"
|
||||||
|
|
||||||
# Ubuntu
|
|
||||||
|
##############################################################################
|
||||||
|
# Ubuntu #
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# Determine if we should install from CUDA repo instead of Canonical repos
|
||||||
|
nvidia_driver_ubuntu_install_from_cuda_repo: no
|
||||||
|
|
||||||
|
# Installing with Canonical repositories
|
||||||
|
nvidia_driver_ubuntu_branch: "450"
|
||||||
|
nvidia_driver_ubuntu_packages:
|
||||||
|
- "nvidia-headless-{{ nvidia_driver_ubuntu_branch }}-server"
|
||||||
|
- "nvidia-utils-{{ nvidia_driver_ubuntu_branch }}-server"
|
||||||
|
- "nvidia-headless-no-dkms-{{ nvidia_driver_ubuntu_branch }}-server"
|
||||||
|
- "nvidia-kernel-source-{{ nvidia_driver_ubuntu_branch }}-server"
|
||||||
|
|
||||||
|
# Installing with CUDA repositories
|
||||||
nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub"
|
nvidia_driver_ubuntu_cuda_repo_gpgkey_url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}/7fa2af80.pub"
|
||||||
nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80"
|
nvidia_driver_ubuntu_cuda_repo_gpgkey_id: "7fa2af80"
|
||||||
nvidia_driver_ubuntu_cuda_repo_baseurl: "http://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}"
|
nvidia_driver_ubuntu_cuda_repo_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ _ubuntu_repo_dir }}"
|
||||||
|
nvidia_driver_ubuntu_cuda_package: "cuda-drivers"
|
||||||
|
@ -11,6 +11,7 @@ galaxy_info:
|
|||||||
versions:
|
versions:
|
||||||
- 'xenial'
|
- 'xenial'
|
||||||
- 'bionic'
|
- 'bionic'
|
||||||
|
- 'focal'
|
||||||
- name: EL
|
- name: EL
|
||||||
versions:
|
versions:
|
||||||
- '7'
|
- '7'
|
||||||
|
@ -1,10 +1,44 @@
|
|||||||
---
|
---
|
||||||
|
# We have to do this because the CentOS mirrors don't keep kernel-headers, etc
|
||||||
|
# for older kernels.
|
||||||
|
- name: ensure we have kernel-headers installed for the current kernel
|
||||||
|
block:
|
||||||
|
- name: attempt to install kernel support packages for current version
|
||||||
|
yum:
|
||||||
|
name:
|
||||||
|
- "kernel-headers-{{ ansible_kernel }}"
|
||||||
|
- "kernel-tools-{{ ansible_kernel }}"
|
||||||
|
- "kernel-tools-libs-{{ ansible_kernel }}"
|
||||||
|
- "kernel-devel-{{ ansible_kernel }}"
|
||||||
|
- "kernel-debug-devel-{{ ansible_kernel }}"
|
||||||
|
state: present
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
rescue:
|
||||||
|
- name: update the kernel to latest version so we have a supported version
|
||||||
|
yum:
|
||||||
|
name:
|
||||||
|
- "kernel"
|
||||||
|
- "kernel-headers"
|
||||||
|
- "kernel-tools"
|
||||||
|
- "kernel-tools-libs"
|
||||||
|
- "kernel-devel"
|
||||||
|
- "kernel-debug-devel"
|
||||||
|
state: latest
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
- name: reboot to pick up the new kernel
|
||||||
|
reboot:
|
||||||
|
|
||||||
|
- name: add epel repo gpg key
|
||||||
|
rpm_key:
|
||||||
|
key: "{{ epel_repo_key }}"
|
||||||
|
state: present
|
||||||
|
|
||||||
- name: add epel repo
|
- name: add epel repo
|
||||||
yum_repository:
|
become: true
|
||||||
name: epel
|
yum:
|
||||||
description: EPEL YUM repo
|
name:
|
||||||
baseurl: "{{ nvidia_driver_rhel_epel_repo_baseurl }}"
|
- "{{ epel_package }}"
|
||||||
gpgkey: "{{ nvidia_driver_rhel_epel_repo_gpgkey }}"
|
state: latest
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
|
||||||
- name: install dependencies
|
- name: install dependencies
|
||||||
@ -18,47 +52,27 @@
|
|||||||
gpgkey: "{{ nvidia_driver_rhel_cuda_repo_gpgkey }}"
|
gpgkey: "{{ nvidia_driver_rhel_cuda_repo_gpgkey }}"
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
|
||||||
- name: install driver packages
|
- name: install driver packages RHEL/CentOS 7 and older
|
||||||
yum:
|
yum:
|
||||||
name: "{{ nvidia_driver_package_version | ternary('nvidia-driver-latest-dkms-'+nvidia_driver_package_version, 'nvidia-driver-latest-dkms') }}"
|
name: "{{ nvidia_driver_package_version | ternary('nvidia-driver-latest-dkms-'+nvidia_driver_package_version, 'nvidia-driver-latest-dkms') }}"
|
||||||
state: "{{ nvidia_driver_package_state }}"
|
state: "{{ nvidia_driver_package_state }}"
|
||||||
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
|
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
|
||||||
|
register: install_driver_rhel7
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
when: ansible_distribution_major_version < '8'
|
||||||
|
|
||||||
|
- name: install driver packages RHEL/CentOS 8 and newer
|
||||||
|
dnf:
|
||||||
|
name: "{{ nvidia_driver_package_version | ternary('@nvidia-driver:'+nvidia_driver_package_version, '@nvidia-driver:latest-dkms') }}"
|
||||||
|
state: "{{ nvidia_driver_package_state }}"
|
||||||
|
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
|
||||||
|
register: install_driver_rhel8
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
when: ansible_distribution_major_version > '7'
|
||||||
|
|
||||||
|
- name: Set install_driver.changed var for RHEL 7/8
|
||||||
|
debug:
|
||||||
|
msg: Driver installed for RHEL
|
||||||
|
when: install_driver_rhel7.changed or install_driver_rhel8.changed
|
||||||
register: install_driver
|
register: install_driver
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
changed_when: install_driver_rhel7.changed or install_driver_rhel8.changed
|
||||||
|
|
||||||
# The driver package pulls in the latest kernel-headers package, but not the
|
|
||||||
# latest kernel. Check to see if there is a mismatch.
|
|
||||||
|
|
||||||
- name: check kernel versions
|
|
||||||
yum:
|
|
||||||
list: kernel
|
|
||||||
register: yum_list
|
|
||||||
|
|
||||||
- name: register installed kernel version
|
|
||||||
debug:
|
|
||||||
msg: "{{ yum_list.results | selectattr('yumstate', 'equalto', 'installed') | list }}"
|
|
||||||
register: kernel_version
|
|
||||||
|
|
||||||
- name: check kernel-headers versions
|
|
||||||
yum:
|
|
||||||
list: kernel-headers
|
|
||||||
register: yum_list
|
|
||||||
|
|
||||||
- name: register installed kernel-headers version
|
|
||||||
debug:
|
|
||||||
msg: "{{ yum_list.results | selectattr('yumstate', 'equalto', 'installed') | list }}"
|
|
||||||
register: kernel_headers_version
|
|
||||||
|
|
||||||
- name: update kernel if headers don't match
|
|
||||||
yum:
|
|
||||||
name:
|
|
||||||
- kernel
|
|
||||||
- kernel-tools
|
|
||||||
- kernel-tools-libs
|
|
||||||
- kernel-devel
|
|
||||||
- kernel-debug-devel
|
|
||||||
- kernel-headers
|
|
||||||
state: latest
|
|
||||||
register: kernel_update
|
|
||||||
when: kernel_version.msg[0].release != kernel_headers_version.msg[0].release
|
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
|
||||||
|
35
tasks/install-ubuntu-cuda-repo.yml
Normal file
35
tasks/install-ubuntu-cuda-repo.yml
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
---
|
||||||
|
- name: remove ppa
|
||||||
|
apt_repository:
|
||||||
|
repo: ppa:graphics-drivers/ppa
|
||||||
|
state: absent
|
||||||
|
|
||||||
|
- name: add pin file
|
||||||
|
copy:
|
||||||
|
src: "cuda-ubuntu.pin"
|
||||||
|
dest: "/etc/apt/preferences.d/cuda-repository-pin-600"
|
||||||
|
owner: "root"
|
||||||
|
group: "root"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: add key
|
||||||
|
apt_key:
|
||||||
|
url: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_url }}"
|
||||||
|
id: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}"
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
|
||||||
|
|
||||||
|
- name: add repo
|
||||||
|
apt_repository:
|
||||||
|
repo: "deb {{ nvidia_driver_ubuntu_cuda_repo_baseurl }} /"
|
||||||
|
update_cache: yes
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
|
||||||
|
- name: install driver packages
|
||||||
|
apt:
|
||||||
|
name: "{{ nvidia_driver_package_version | ternary(nvidia_driver_ubuntu_cuda_package+'='+nvidia_driver_package_version, nvidia_driver_ubuntu_cuda_package) }}"
|
||||||
|
state: "{{ nvidia_driver_package_state }}"
|
||||||
|
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
|
||||||
|
purge: "{{ nvidia_driver_package_state == 'absent' }}"
|
||||||
|
register: install_driver
|
||||||
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
@ -4,32 +4,12 @@
|
|||||||
repo: ppa:graphics-drivers/ppa
|
repo: ppa:graphics-drivers/ppa
|
||||||
state: absent
|
state: absent
|
||||||
|
|
||||||
- name: add pin file
|
|
||||||
copy:
|
|
||||||
src: "cuda-ubuntu.pin"
|
|
||||||
dest: "/etc/apt/preferences.d/cuda-repository-pin-600"
|
|
||||||
owner: "root"
|
|
||||||
group: "root"
|
|
||||||
mode: "0644"
|
|
||||||
|
|
||||||
- name: add key
|
|
||||||
apt_key:
|
|
||||||
url: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_url }}"
|
|
||||||
id: "{{ nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}"
|
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
|
||||||
|
|
||||||
|
|
||||||
- name: add repo
|
|
||||||
apt_repository:
|
|
||||||
repo: "deb {{ nvidia_driver_ubuntu_cuda_repo_baseurl }} /"
|
|
||||||
update_cache: yes
|
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
|
||||||
|
|
||||||
- name: install driver packages
|
- name: install driver packages
|
||||||
apt:
|
apt:
|
||||||
name: "{{ nvidia_driver_package_version | ternary('cuda-drivers='+nvidia_driver_package_version, 'cuda-drivers') }}"
|
name: "{{ nvidia_driver_package_version | ternary(item+'='+nvidia_driver_package_version, item) }}"
|
||||||
state: "{{ nvidia_driver_package_state }}"
|
state: "{{ nvidia_driver_package_state }}"
|
||||||
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
|
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
|
||||||
purge: "{{ nvidia_driver_package_state == 'absent' }}"
|
purge: "{{ nvidia_driver_package_state == 'absent' }}"
|
||||||
|
with_items: "{{ nvidia_driver_ubuntu_packages }}"
|
||||||
register: install_driver
|
register: install_driver
|
||||||
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
environment: "{{proxy_env if proxy_env is defined else {}}}"
|
||||||
|
@ -5,9 +5,18 @@
|
|||||||
state: absent
|
state: absent
|
||||||
ignore_errors: true
|
ignore_errors: true
|
||||||
|
|
||||||
- name: ubuntu install tasks
|
- name: Blacklist the nouveau driver module
|
||||||
|
community.general.kernel_blacklist:
|
||||||
|
name: nouveau
|
||||||
|
state: present
|
||||||
|
|
||||||
|
- name: ubuntu install tasks (canonical repos)
|
||||||
include_tasks: install-ubuntu.yml
|
include_tasks: install-ubuntu.yml
|
||||||
when: ansible_distribution == 'Ubuntu'
|
when: ansible_distribution == 'Ubuntu' and (not nvidia_driver_ubuntu_install_from_cuda_repo)
|
||||||
|
|
||||||
|
- name: ubuntu install tasks (CUDA repo)
|
||||||
|
include_tasks: install-ubuntu-cuda-repo.yml
|
||||||
|
when: ansible_distribution == 'Ubuntu' and nvidia_driver_ubuntu_install_from_cuda_repo
|
||||||
|
|
||||||
- name: redhat family install tasks
|
- name: redhat family install tasks
|
||||||
include_tasks: install-redhat.yml
|
include_tasks: install-redhat.yml
|
||||||
|
Loading…
Reference in New Issue
Block a user