diff --git a/defaults/main.yml b/defaults/main.yml index 2859a60..7f87e8c 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -1,2 +1,10 @@ ---- -# defaults file for ansible-role-nvidia-driver \ No newline at end of file +nvidia_driver_package_version: '' +nvidia_driver_persistence_mode_on: yes +nvidia_driver_skip_reboot: no +epel_baseurl: "https://download.fedoraproject.org/pub/epel/$releasever/$basearch/" +epel_gpgkey: "https://epel.mirror.constant.com//RPM-GPG-KEY-EPEL-{{ ansible_distribution_major_version }}" +rhel_cuda_baseurl: "https://developer.download.nvidia.com/compute/cuda/repos/{{ rhel_repo_dir }}/" +rhel_cuda_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ rhel_repo_dir }}/7fa2af80.pub" +ubuntu_cuda_gpgkey: "https://developer.download.nvidia.com/compute/cuda/repos/{{ ubuntu_repo_dir }}/7fa2af80.pub" +ubuntu_cuda_apt_key: "7fa2af80" +ubuntu_cuda_baseurl: "http://developer.download.nvidia.com/compute/cuda/repos/{{ ubuntu_repo_dir }}" diff --git a/files/nvidia-persistenced-override.conf b/files/nvidia-persistenced-override.conf new file mode 100644 index 0000000..52b552d --- /dev/null +++ b/files/nvidia-persistenced-override.conf @@ -0,0 +1,3 @@ +[Service] +ExecStart= +ExecStart=/usr/bin/nvidia-persistenced --user root --persistence-mode --verbose diff --git a/handlers/main.yml b/handlers/main.yml deleted file mode 100644 index 178c567..0000000 --- a/handlers/main.yml +++ /dev/null @@ -1,2 +0,0 @@ ---- -# handlers file for ansible-role-nvidia-driver \ No newline at end of file diff --git a/tasks/main.yml b/tasks/main.yml index 14a251f..0cf06a8 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -1,2 +1,51 @@ --- -# tasks file for ansible-role-nvidia-driver \ No newline at end of file +- name: unload nouveau + modprobe: + name: nouveau + state: absent + ignore_errors: true + +- name: ubuntu pre-install tasks + include_tasks: ubuntu-pre-install.yml + when: ansible_distribution == 'Ubuntu' + +- name: redhat family pre-install tasks + include_tasks: redhat-pre-install.yml + when: ansible_os_family == 'RedHat' + +- name: install driver packages + package: + name: "{{ nvidia_driver_package_version | ternary('cuda-drivers='+nvidia_driver_package_version, 'cuda-drivers') }}" + state: present + register: install_driver + +- name: redhat family post-install tasks + include_tasks: redhat-post-install.yml + when: ansible_os_family == 'RedHat' + +- name: create persistenced override dir + file: + path: /etc/systemd/system/nvidia-persistenced.service.d/ + state: directory + recurse: yes + +- name: configure persistenced service to turn on persistence mode + copy: + src: nvidia-persistenced-override.conf + dest: /etc/systemd/system/nvidia-persistenced.service.d/override.conf + when: nvidia_driver_persistence_mode_on + +- name: remove persistenced service override + file: + path: /etc/systemd/system/nvidia-persistenced.service.d/override.conf + state: absent + when: not nvidia_driver_persistence_mode_on + +- name: enable persistenced + systemd: + name: nvidia-persistenced + enabled: yes + +- name: reboot after driver install + reboot: + when: install_driver.changed and not nvidia_driver_skip_reboot diff --git a/tasks/redhat-post-install.yml b/tasks/redhat-post-install.yml new file mode 100644 index 0000000..65fe713 --- /dev/null +++ b/tasks/redhat-post-install.yml @@ -0,0 +1,36 @@ +--- +# The driver package pulls in the latest kernel-headers package, but not the +# latest kernel. Check to see if there is a mismatch. + +- name: check kernel versions + yum: + list: kernel + register: yum_list + +- name: register installed kernel version + debug: + msg: "{{ yum_list.results | selectattr('yumstate', 'equalto', 'installed') | list }}" + register: kernel_version + +- name: check kernel-headers versions + yum: + list: kernel-headers + register: yum_list + +- name: register installed kernel-headers version + debug: + msg: "{{ yum_list.results | selectattr('yumstate', 'equalto', 'installed') | list }}" + register: kernel_headers_version + +- name: update kernel if headers don't match + yum: + name: + - kernel + - kernel-tools + - kernel-tools-libs + - kernel-devel + - kernel-debug-devel + - kernel-headers + state: latest + register: kernel_update + when: kernel_version.msg[0].release != kernel_headers_version.msg[0].release diff --git a/tasks/redhat-pre-install.yml b/tasks/redhat-pre-install.yml new file mode 100644 index 0000000..dd2e0b1 --- /dev/null +++ b/tasks/redhat-pre-install.yml @@ -0,0 +1,19 @@ +--- +- name: add epel repo + yum_repository: + name: epel + description: EPEL YUM repo + baseurl: "{{ epel_baseurl }}" + gpgkey: "{{ epel_gpgkey }}" + +- name: install dependencies + yum: + name: dkms + state: latest + +- name: add repo + yum_repository: + name: cuda + description: NVIDIA CUDA YUM Repo + gpgkey: "{{ rhel_cuda_gpgkey }}" + baseurl: "{{ rhel_cuda_baseurl }}" diff --git a/tasks/ubuntu-pre-install.yml b/tasks/ubuntu-pre-install.yml new file mode 100644 index 0000000..4907bb5 --- /dev/null +++ b/tasks/ubuntu-pre-install.yml @@ -0,0 +1,15 @@ +--- +- name: remove ppa + apt_repository: + repo: ppa:graphics-drivers/ppa + state: absent + +- name: add key + apt_key: + url: "{{ ubuntu_cuda_gpgkey }}" + id: "{{ ubuntu_cuda_apt_key }}" + +- name: add repo + apt_repository: + repo: "deb {{ ubuntu_cuda_baseurl }} /" + update_cache: yes diff --git a/vars/main.yml b/vars/main.yml index 629da4d..6a48d95 100644 --- a/vars/main.yml +++ b/vars/main.yml @@ -1,2 +1,2 @@ ---- -# vars file for ansible-role-nvidia-driver \ No newline at end of file +ubuntu_repo_dir: "{{ ansible_distribution | lower }}{{ ansible_distribution_version | replace('.', '') }}/{{ ansible_architecture }}" +rhel_repo_dir: "rhel{{ ansible_distribution_major_version }}/{{ ansible_architecture }}"