201 lines
6.4 KiB
YAML
201 lines
6.4 KiB
YAML
---
|
|
- name: Check Proxmox VE cluster health
|
|
hosts: nodes
|
|
any_errors_fatal: true
|
|
become: true
|
|
tasks:
|
|
|
|
- delegate_to: "{{ groups['nodes'][0] }}"
|
|
run_once: true
|
|
block:
|
|
|
|
- name: Verify cluster quorum
|
|
ansible.builtin.command: pvecm status
|
|
register: quorum_status
|
|
changed_when: false
|
|
failed_when: quorum_status.stdout is not search('Quorate:\\s*Yes')
|
|
|
|
- name: Verify Ceph health
|
|
ansible.builtin.command: ceph health
|
|
register: ceph_health
|
|
changed_when: false
|
|
failed_when: "'HEALTH_OK' not in ceph_health.stdout"
|
|
|
|
rescue:
|
|
|
|
- name: Send no ready notification
|
|
ansible.builtin.uri:
|
|
url: "{{ ntfy_url }}/{{ ntfy_topic }}"
|
|
method: POST
|
|
user: "{{ ntfy_user }}"
|
|
password: "{{ lookup('env', 'NTFY_PASSWORD') }}"
|
|
force_basic_auth: true
|
|
body: No updates have been rolled out
|
|
headers:
|
|
Title: "Proxmox VE Not Ready for Updates"
|
|
Priority: "default"
|
|
Tags: "x"
|
|
delegate_to: localhost
|
|
become: false
|
|
run_once: true
|
|
when: ntfy_url is defined
|
|
|
|
- ansible.builtin.fail:
|
|
msg: "Update aborted"
|
|
|
|
|
|
- name: Rolling update of Proxmox VE cluster
|
|
hosts: nodes
|
|
serial: 1
|
|
any_errors_fatal: true
|
|
become: true
|
|
tasks:
|
|
|
|
- block:
|
|
|
|
- name: Refresh repositories
|
|
ansible.builtin.apt:
|
|
update_cache: true
|
|
|
|
- name: Check if updates are available
|
|
ansible.builtin.apt:
|
|
upgrade: dist
|
|
check_mode: true
|
|
register: apt_check
|
|
|
|
- name: Proceed if updates are available
|
|
when: apt_check.changed
|
|
block:
|
|
|
|
- name: Get version before upgrade
|
|
ansible.builtin.shell: pveversion | awk -F'/' '{print $2}'
|
|
register: pve_old_version
|
|
changed_when: false
|
|
|
|
- name: Enable maintenance mode
|
|
ansible.builtin.command: >
|
|
ha-manager crm-command node-maintenance enable {{ inventory_hostname_short }}
|
|
|
|
- name: Wait for LXCs to leave node
|
|
ansible.builtin.shell: |
|
|
pct list | awk 'NR>1 && $2=="running" {count++} END {print count+0}'
|
|
register: lxc_count
|
|
changed_when: false
|
|
until: lxc_count.stdout | int == 0
|
|
retries: 60
|
|
delay: 15
|
|
|
|
- name: Wait for VMs to leave node
|
|
ansible.builtin.shell: |
|
|
qm list | awk 'NR>1 && $3=="running" {count++} END {print count+0}'
|
|
register: vm_count
|
|
changed_when: false
|
|
until: vm_count.stdout | int == 0
|
|
retries: 60
|
|
delay: 15
|
|
|
|
- name: Update packages
|
|
ansible.builtin.apt:
|
|
upgrade: full
|
|
autoremove: true
|
|
autoclean: true
|
|
|
|
- name: Disable Ceph rebalancing
|
|
ansible.builtin.command: ceph osd set noout
|
|
|
|
- name: Reboot node
|
|
ansible.builtin.reboot:
|
|
reboot_timeout: 900
|
|
post_reboot_delay: 30
|
|
|
|
- name: Enable Ceph rebalancing
|
|
ansible.builtin.command: ceph osd unset noout
|
|
|
|
- name: Disable maintenance mode
|
|
ansible.builtin.command: >
|
|
ha-manager crm-command node-maintenance disable {{ inventory_hostname_short }}
|
|
|
|
- name: Get version after upgrade
|
|
ansible.builtin.shell: pveversion | awk -F'/' '{print $2}'
|
|
register: pve_new_version
|
|
changed_when: false
|
|
|
|
- name: Save update report
|
|
ansible.builtin.set_fact:
|
|
update_report:
|
|
old: "{{ pve_old_version.stdout }}"
|
|
new: "{{ pve_new_version.stdout }}"
|
|
|
|
- name: Wait for Ceph to be healthy
|
|
ansible.builtin.command: ceph health
|
|
register: ceph_status
|
|
changed_when: false
|
|
until: "'HEALTH_OK' in ceph_status.stdout"
|
|
retries: 60
|
|
delay: 15
|
|
delegate_to: "{{ groups['nodes'][0] }}"
|
|
|
|
rescue:
|
|
|
|
- name: Send failure notification
|
|
ansible.builtin.uri:
|
|
url: "{{ ntfy_url }}/{{ ntfy_topic }}"
|
|
method: POST
|
|
user: "{{ ntfy_user }}"
|
|
password: "{{ lookup('env', 'NTFY_PASSWORD') }}"
|
|
force_basic_auth: true
|
|
body: Update failed on {{ inventory_hostname_short }}
|
|
headers:
|
|
Title: "Proxmox VE Update Failed"
|
|
Priority: "high"
|
|
Tags: "x"
|
|
delegate_to: localhost
|
|
become: false
|
|
run_once: true
|
|
when: ntfy_url is defined
|
|
|
|
- ansible.builtin.fail:
|
|
msg: "Update aborted"
|
|
|
|
|
|
- name: Send notification
|
|
hosts: localhost
|
|
tasks:
|
|
|
|
- name: Determine if updates occurred
|
|
ansible.builtin.set_fact:
|
|
updates_performed: "{{ groups['nodes'] | map('extract', hostvars) | selectattr('update_report', 'defined') | list | length > 0 }}"
|
|
|
|
- name: Send success notification
|
|
ansible.builtin.uri:
|
|
url: "{{ ntfy_url }}/{{ ntfy_topic }}"
|
|
method: POST
|
|
user: "{{ ntfy_user }}"
|
|
password: "{{ lookup('env', 'NTFY_PASSWORD') }}"
|
|
force_basic_auth: true
|
|
body: |
|
|
{% set updated_nodes = [] %}
|
|
{% for node in groups['nodes'] %}
|
|
{% if hostvars[node].update_report is defined %}
|
|
{% set _ = updated_nodes.append(node) %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{% if not updates_performed %}
|
|
No updates available on the cluster.
|
|
{% else %}
|
|
The following nodes were updated:
|
|
{% for node in updated_nodes %}
|
|
{% if hostvars[node].update_report.old == hostvars[node].update_report.new %}
|
|
- {{ hostvars[node].inventory_hostname_short }}: version {{ hostvars[node].update_report.old }} (unchanged)
|
|
{% else %}
|
|
- {{ hostvars[node].inventory_hostname_short }}: version {{ hostvars[node].update_report.old }} → {{ hostvars[node].update_report.new }}
|
|
{% endif %}
|
|
{% endfor %}
|
|
{% endif %}
|
|
headers:
|
|
Title: "Proxmox VE Update Report"
|
|
Priority: "{{ 'min' if not updates_performed else 'default' }}"
|
|
Tags: "white_check_mark"
|
|
when: ntfy_url is defined
|
|
|