This commit is contained in:
2024-07-12 00:07:31 -04:00
parent 6948b9baa8
commit abd86e10e6
91 changed files with 5207 additions and 206 deletions

View File

@@ -0,0 +1,89 @@
---
# We have to do this because the CentOS mirrors don't keep kernel-headers, etc
# for older kernels.
- name: ensure we have kernel-headers installed for the current kernel
block:
- name: attempt to install kernel support packages for current version
yum:
name:
- "kernel-headers-{{ ansible_kernel }}"
- "kernel-tools-{{ ansible_kernel }}"
- "kernel-tools-libs-{{ ansible_kernel }}"
- "kernel-devel-{{ ansible_kernel }}"
- "kernel-debug-devel-{{ ansible_kernel }}"
state: present
environment: "{{proxy_env if proxy_env is defined else {}}}"
rescue:
- name: update the kernel to latest version so we have a supported version
yum:
name:
- "kernel"
- "kernel-headers"
- "kernel-tools"
- "kernel-tools-libs"
- "kernel-devel"
- "kernel-debug-devel"
state: latest
environment: "{{proxy_env if proxy_env is defined else {}}}"
- name: reboot to pick up the new kernel
reboot:
when: not nvidia_driver_skip_reboot
- name: add epel repo gpg key
rpm_key:
key: "{{ epel_repo_key }}"
state: present
when: nvidia_driver_add_repos | bool
- name: add epel repo
become: true
yum:
name:
- "{{ epel_package }}"
state: latest
environment: "{{proxy_env if proxy_env is defined else {}}}"
when: nvidia_driver_add_repos | bool
- name: install dependencies
yum:
name: dkms
state: present
- name: blacklist nouveau
kernel_blacklist:
name: nouveau
state: present
- name: add repo
yum_repository:
name: cuda
description: NVIDIA CUDA YUM Repo
baseurl: "{{ nvidia_driver_rhel_cuda_repo_baseurl }}"
gpgkey: "{{ nvidia_driver_rhel_cuda_repo_gpgkey }}"
environment: "{{proxy_env if proxy_env is defined else {}}}"
when: nvidia_driver_add_repos | bool
- name: install driver packages RHEL/CentOS 7 and older
yum:
name: "{{ nvidia_driver_package_version | ternary('nvidia-driver-latest-dkms-'+nvidia_driver_package_version, 'nvidia-driver-branch-'+nvidia_driver_rhel_branch) }}"
state: "{{ nvidia_driver_package_state }}"
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
register: install_driver_rhel7
environment: "{{proxy_env if proxy_env is defined else {}}}"
when: ansible_distribution_major_version < '8'
- name: install driver packages RHEL/CentOS 8 and newer
dnf:
name: "{{ nvidia_driver_package_version | ternary('@nvidia-driver:'+nvidia_driver_package_version, '@nvidia-driver:'+nvidia_driver_rhel_branch+'-dkms') }}"
state: "{{ nvidia_driver_package_state }}"
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
register: install_driver_rhel8
environment: "{{proxy_env if proxy_env is defined else {}}}"
when: ansible_distribution_major_version > '7'
- name: Set install_driver.changed var for RHEL 7/8
debug:
msg: Driver installed for RHEL
when: install_driver_rhel7.changed or install_driver_rhel8.changed
register: install_driver
changed_when: install_driver_rhel7.changed or install_driver_rhel8.changed

View File

@@ -0,0 +1,43 @@
---
- name: remove ppa
apt_repository:
repo: ppa:graphics-drivers/ppa
state: absent
- name: remove old signing key
apt_key:
id: "{{ old_nvidia_driver_ubuntu_cuda_repo_gpgkey_id }}"
state: absent
environment: "{{proxy_env if proxy_env is defined else {}}}"
when: nvidia_driver_add_repos | bool
- name: add CUDA keyring
apt:
deb: "{{ nvidia_driver_ubuntu_cuda_keyring_url }}"
state: "present"
environment: "{{proxy_env if proxy_env is defined else {}}}"
when: nvidia_driver_add_repos | bool
- name: force an apt update
apt:
update_cache: true
changed_when: false
- name: ensure kmod is installed
apt:
name: "kmod"
state: "present"
- name: blacklist nouveau
kernel_blacklist:
name: nouveau
state: present
- name: install driver packages
apt:
name: "{{ nvidia_driver_package_version | ternary(nvidia_driver_ubuntu_cuda_package+'='+nvidia_driver_package_version, nvidia_driver_ubuntu_cuda_package) }}"
state: "{{ nvidia_driver_package_state }}"
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
purge: "{{ nvidia_driver_package_state == 'absent' }}"
register: install_driver
environment: "{{proxy_env if proxy_env is defined else {}}}"

View File

@@ -0,0 +1,15 @@
---
- name: remove ppa
apt_repository:
repo: ppa:graphics-drivers/ppa
state: absent
- name: install driver packages
apt:
name: "{{ nvidia_driver_package_version | ternary(item+'='+nvidia_driver_package_version, item) }}"
state: "{{ nvidia_driver_package_state }}"
autoremove: "{{ nvidia_driver_package_state == 'absent' }}"
purge: "{{ nvidia_driver_package_state == 'absent' }}"
loop: "{{ nvidia_driver_ubuntu_packages }}"
register: install_driver
environment: "{{proxy_env if proxy_env is defined else {}}}"

View File

@@ -0,0 +1,52 @@
---
- name: unload nouveau
modprobe:
name: nouveau
state: absent
ignore_errors: true
- name: ubuntu install tasks (canonical repos)
include_tasks: install-ubuntu.yml
when: ansible_distribution == 'Ubuntu' and (not nvidia_driver_ubuntu_install_from_cuda_repo)
- name: ubuntu install tasks (CUDA repo)
include_tasks: install-ubuntu-cuda-repo.yml
when: ansible_distribution == 'Ubuntu' and nvidia_driver_ubuntu_install_from_cuda_repo
- name: redhat family install tasks
include_tasks: install-redhat.yml
when: ansible_os_family == 'RedHat'
- name: create persistenced override dir
file:
path: /etc/systemd/system/nvidia-persistenced.service.d/
state: directory
recurse: yes
- name: configure persistenced service to turn on persistence mode
copy:
src: nvidia-persistenced-override.conf
dest: /etc/systemd/system/nvidia-persistenced.service.d/override.conf
when: nvidia_driver_persistence_mode_on
- name: remove persistenced service override
file:
path: /etc/systemd/system/nvidia-persistenced.service.d/override.conf
state: absent
when: not nvidia_driver_persistence_mode_on
- name: enable persistenced
systemd:
name: nvidia-persistenced
enabled: yes
when: nvidia_driver_package_state != 'absent'
- name: set module parameters
template:
src: nvidia.conf.j2
dest: "{{ nvidia_driver_module_file }}"
mode: '0644'
- name: reboot after driver install
reboot:
when: install_driver.changed and not nvidia_driver_skip_reboot