This commit is contained in:
Eric Meehan 2024-07-04 19:04:55 +00:00
parent 5ac7e482ea
commit 4dae39b470
15 changed files with 156 additions and 6 deletions

3
group_vars/all.yml Normal file
View File

@ -0,0 +1,3 @@
---
# Global defaults
nvidia: false

View File

@ -19,3 +19,4 @@ allow-hotplug eno2np1
iface eno2np1 inet dhcp
"
nvidia: true

View File

@ -8,8 +8,8 @@ all:
control_plane:
hosts:
alpha-control-plane:
ansible-host: 192.168.1.92
ansible-host: 192.168.1.103
workers:
hosts:
alpha-worker-0:
ansible-host: 192.168.1.94
ansible-host: 192.168.1.102

25
nvidia_test.yml Normal file
View File

@ -0,0 +1,25 @@
# my playbook
- name: Install Nvidia drivers
hosts: alpha-worker-0
become: true
pre_tasks:
- name: Add contrib & non-free repository
replace:
dest: /etc/apt/sources.list
regexp: '^(deb(?!.* contrib).*)'
replace: '\1 contrib non-free'
- name: Install Linux headers
apt:
name: linux-headers-{{ ansible_kernel }}
state: present
roles:
- role: nvidia.nvidia_driver # should run after cluster install
vars:
# See https://github.com/NVIDIA/ansible-role-nvidia-driver#role-variables
nvidia_driver_skip_reboot: no
nvidia_driver_branch: "555"
nvidia_driver_ubuntu_cuda_repo_baseurl: 'https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64' # enforced 'debian11'
nvidia_driver_ubuntu_cuda_keyring_package: "cuda-keyring_1.1-1_all.deb"
nvidia_driver_ubuntu_install_from_cuda_repo: yes
nvidia_driver_persistence_mode_on: yes
ansible_distribution: Ubuntu # forcing in to the ubuntu part of the role

View File

@ -3,7 +3,7 @@
- name: Copy nftables configuration template
template:
src: "nftables.conf.j2"
dest: /etc/nftables.con
dest: /etc/nftables.conf
- name: Enable nftables
service:

View File

@ -5,9 +5,9 @@ flush ruleset
table inet filter {
chain input {
type filter hook input priority filter;
{% for port in open_ports %}
iifname "{{ port.interface }}" {{ port.protocol }} dport {{ port.port }} accept
{% endfor %}
{% for port in open_ports %}
iifname "{{ port.interface }}" {{ port.protocol }} dport {{ port.port }} accept;
{% endfor %}
}
chain forward {
type filter hook forward priority filter;

View File

@ -0,0 +1,38 @@
Role Name
=========
A brief description of the role goes here.
Requirements
------------
Any pre-requisites that may not be covered by Ansible itself or the role should be mentioned here. For instance, if the role uses the EC2 module, it may be a good idea to mention in this section that the boto package is required.
Role Variables
--------------
A description of the settable variables for this role should go here, including any variables that are in defaults/main.yml, vars/main.yml, and any variables that can/should be set via parameters to the role. Any variables that are read from other roles and/or the global scope (ie. hostvars, group vars, etc.) should be mentioned here as well.
Dependencies
------------
A list of other roles hosted on Galaxy should go here, plus any details in regards to parameters that may need to be set for other roles, or variables that are used from other roles.
Example Playbook
----------------
Including an example of how to use your role (for instance, with variables passed in as parameters) is always nice for users too:
- hosts: servers
roles:
- { role: username.rolename, x: 42 }
License
-------
BSD
Author Information
------------------
An optional section for the role authors to include contact information, or a website (HTML is not allowed).

View File

@ -0,0 +1,2 @@
---
# defaults file for ericomeehan.nvidia_driver_debian

View File

@ -0,0 +1,2 @@
---
# handlers file for ericomeehan.nvidia_driver_debian

View File

@ -0,0 +1,34 @@
galaxy_info:
author: your name
description: your role description
company: your company (optional)
# If the issue tracker for your role is not on github, uncomment the
# next line and provide a value
# issue_tracker_url: http://example.com/issue/tracker
# Choose a valid license ID from https://spdx.org - some suggested licenses:
# - BSD-3-Clause (default)
# - MIT
# - GPL-2.0-or-later
# - GPL-3.0-only
# - Apache-2.0
# - CC-BY-4.0
license: license (GPL-2.0-or-later, MIT, etc)
min_ansible_version: 2.1
# If this a Container Enabled role, provide the minimum Ansible Container version.
# min_ansible_container_version:
galaxy_tags: []
# List tags for your role here, one per line. A tag is a keyword that describes
# and categorizes the role. Users find roles by searching for tags. Be sure to
# remove the '[]' above, if you add tags to this list.
#
# NOTE: A tag is limited to a single word comprised of alphanumeric characters.
# Maximum 20 tags per role.
dependencies: []
# List your role dependencies here, one per line. Be sure to remove the '[]' above,
# if you add dependencies to this list.

View File

@ -0,0 +1,32 @@
---
# tasks file for ericomeehan.nvidia_driver_debian
- name: Add contrib & non-free repository
replace:
dest: /etc/apt/sources.list
regexp: '^(deb(?!.* contrib).*)'
replace: '\1 contrib non-free'
- name: Update apt
become: yes
apt:
update_cache: yes
when: nvidia == true
- name: Install Linux headers
apt:
name: linux-headers-{{ ansible_kernel }}
state: present
- name: Install Nvidia driver
apt:
name: nvidia-driver
state: present
- name: Install necessary firmware
apt:
name: firmware-misc-nonfree
state: present
- name: Install CUDA drivers
apt:
name: nvidia-cuda-dev
state: present
- name: Install CUDA toolkit
apt:
name: nvidia-cuda-toolkit
state: present

View File

@ -0,0 +1,2 @@
localhost

View File

@ -0,0 +1,5 @@
---
- hosts: localhost
remote_user: root
roles:
- ericomeehan.nvidia_driver_debian

View File

@ -0,0 +1,2 @@
---
# vars file for ericomeehan.nvidia_driver_debian

View File

@ -32,3 +32,7 @@
roles:
- role: geerlingguy.containerd
- role: geerlingguy.kubernetes
- role: ericomeehan.nvidia_driver_debian
when: nvidia == true and ansible_os_family == 'Debian'
tasks: