Automate Proxmox VE updates #2

Merged
Vezpi merged 2 commits from lab into main 2026-06-09 22:09:06 +02:00
2 changed files with 290 additions and 0 deletions

View File

@@ -0,0 +1,200 @@
---
- name: Check Proxmox VE cluster health
hosts: nodes
any_errors_fatal: true
become: true
tasks:
- delegate_to: "{{ groups['nodes'][0] }}"
run_once: true
block:
- name: Verify cluster quorum
ansible.builtin.command: pvecm status
register: quorum_status
changed_when: false
failed_when: quorum_status.stdout is not search('Quorate:\\s*Yes')
- name: Verify Ceph health
ansible.builtin.command: ceph health
register: ceph_health
changed_when: false
failed_when: "'HEALTH_OK' not in ceph_health.stdout"
rescue:
- name: Send no ready notification
ansible.builtin.uri:
url: "{{ ntfy_url }}/{{ ntfy_topic }}"
method: POST
user: "{{ ntfy_user }}"
password: "{{ lookup('env', 'NTFY_PASSWORD') }}"
force_basic_auth: true
body: No updates have been rolled out
headers:
Title: "Proxmox VE Not Ready for Updates"
Priority: "default"
Tags: "x"
delegate_to: localhost
become: false
run_once: true
when: ntfy_url is defined
- ansible.builtin.fail:
msg: "Update aborted"
- name: Rolling update of Proxmox VE cluster
hosts: nodes
serial: 1
any_errors_fatal: true
become: true
tasks:
- block:
- name: Refresh repositories
ansible.builtin.apt:
update_cache: true
- name: Check if updates are available
ansible.builtin.apt:
upgrade: dist
check_mode: true
register: apt_check
- name: Proceed if updates are available
when: apt_check.changed
block:
- name: Get version before upgrade
ansible.builtin.shell: pveversion | awk -F'/' '{print $2}'
register: pve_old_version
changed_when: false
- name: Enable maintenance mode
ansible.builtin.command: >
ha-manager crm-command node-maintenance enable {{ inventory_hostname_short }}
- name: Wait for LXCs to leave node
ansible.builtin.shell: |
pct list | awk 'NR>1 && $2=="running" {count++} END {print count+0}'
register: lxc_count
changed_when: false
until: lxc_count.stdout | int == 0
retries: 60
delay: 15
- name: Wait for VMs to leave node
ansible.builtin.shell: |
qm list | awk 'NR>1 && $3=="running" {count++} END {print count+0}'
register: vm_count
changed_when: false
until: vm_count.stdout | int == 0
retries: 60
delay: 15
- name: Update packages
ansible.builtin.apt:
upgrade: full
autoremove: true
autoclean: true
- name: Disable Ceph rebalancing
ansible.builtin.command: ceph osd set noout
- name: Reboot node
ansible.builtin.reboot:
reboot_timeout: 900
post_reboot_delay: 30
- name: Enable Ceph rebalancing
ansible.builtin.command: ceph osd unset noout
- name: Disable maintenance mode
ansible.builtin.command: >
ha-manager crm-command node-maintenance disable {{ inventory_hostname_short }}
- name: Get version after upgrade
ansible.builtin.shell: pveversion | awk -F'/' '{print $2}'
register: pve_new_version
changed_when: false
- name: Save update report
ansible.builtin.set_fact:
update_report:
old: "{{ pve_old_version.stdout }}"
new: "{{ pve_new_version.stdout }}"
- name: Wait for Ceph to be healthy
ansible.builtin.command: ceph health
register: ceph_status
changed_when: false
until: "'HEALTH_OK' in ceph_status.stdout"
retries: 60
delay: 15
delegate_to: "{{ groups['nodes'][0] }}"
rescue:
- name: Send failure notification
ansible.builtin.uri:
url: "{{ ntfy_url }}/{{ ntfy_topic }}"
method: POST
user: "{{ ntfy_user }}"
password: "{{ lookup('env', 'NTFY_PASSWORD') }}"
force_basic_auth: true
body: Update failed on {{ inventory_hostname_short }}
headers:
Title: "Proxmox VE Update Failed"
Priority: "high"
Tags: "x"
delegate_to: localhost
become: false
run_once: true
when: ntfy_url is defined
- ansible.builtin.fail:
msg: "Update aborted"
- name: Send notification
hosts: localhost
tasks:
- name: Determine if updates occurred
ansible.builtin.set_fact:
updates_performed: "{{ groups['nodes'] | map('extract', hostvars) | selectattr('update_report', 'defined') | list | length > 0 }}"
- name: Send success notification
ansible.builtin.uri:
url: "{{ ntfy_url }}/{{ ntfy_topic }}"
method: POST
user: "{{ ntfy_user }}"
password: "{{ lookup('env', 'NTFY_PASSWORD') }}"
force_basic_auth: true
body: |
{% set updated_nodes = [] %}
{% for node in groups['nodes'] %}
{% if hostvars[node].update_report is defined %}
{% set _ = updated_nodes.append(node) %}
{% endif %}
{% endfor %}
{% if not updates_performed %}
No updates available on the cluster.
{% else %}
The following nodes were updated:
{% for node in updated_nodes %}
{% if hostvars[node].update_report.old == hostvars[node].update_report.new %}
- {{ hostvars[node].inventory_hostname_short }}: version {{ hostvars[node].update_report.old }} (unchanged)
{% else %}
- {{ hostvars[node].inventory_hostname_short }}: version {{ hostvars[node].update_report.old }} → {{ hostvars[node].update_report.new }}
{% endif %}
{% endfor %}
{% endif %}
headers:
Title: "Proxmox VE Update Report"
Priority: "{{ 'min' if not updates_performed else 'default' }}"
Tags: "white_check_mark"
when: ntfy_url is defined

90
opnsense/10-wan Normal file
View File

@@ -0,0 +1,90 @@
#!/usr/local/bin/php
<?php
/**
* Author 2025 Etienne Girault <etienne.girault@gmail.com>
* OPNsense CARP event script
* - Enables/disables the WAN interface only when needed
* - Avoids reapplying config when CARP triggers multiple times
*/
require_once("config.inc");
require_once("interfaces.inc");
require_once("util.inc");
require_once("system.inc");
// Read CARP event arguments
$subsystem = !empty($argv[1]) ? $argv[1] : '';
$type = !empty($argv[2]) ? $argv[2] : '';
// Accept only MASTER/BACKUP events
if (!in_array($type, ['MASTER', 'BACKUP'])) {
// Ignore CARP INIT, DEMOTED, etc.
exit(0);
}
// Validate subsystem name format, expected pattern: <ifname>@<vhid>
if (!preg_match('/^[a-z0-9_]+@\S+$/i', $subsystem)) {
log_error("Malformed subsystem argument: '{$subsystem}'.");
exit(0);
}
// Only react to the primary VHID
list($vhid, $iface) = explode('@', $subsystem);
$primary_vhid = '1'; //
if ($vhid !== $primary_vhid) {
exit(0); // ignore events from other VHIDs
}
// Interface key to manage
$ifkey = 'wan';
// Determine whether WAN interface is currently enabled
$ifkey_enabled = !empty($config['interfaces'][$ifkey]['enable']) ? true : false;
// Lock file to prevent interface flapping
$lock_file = '/tmp/carp_wan_disable_lock';
$lock_default_age = 5;
$lock_max_age = 10;
// MASTER event
if ($type === "MASTER") {
// Enable WAN only if it's currently disabled
if (!$ifkey_enabled) {
// Check if lock file is present
if (file_exists($lock_file)) {
$lock_age = time() - (int)file_get_contents($lock_file);
if ($lock_age < $lock_max_age) {
log_msg("CARP event: WAN disable lock present ({$lock_age}s old), waiting...");
$elapsed = 0;
while (file_exists($lock_file) && $elapsed < 5000) {
usleep(500000);
$elapsed += 500;
}
} else {
log_msg("CARP event: removing stale WAN disable lock.");
@unlink($lock_file);
}
}
log_msg("CARP event: switching to '$type', enabling interface '$ifkey'.", LOG_WARNING);
$config['interfaces'][$ifkey]['enable'] = '1';
write_config("enable interface '$ifkey' due CARP event '$type'", false);
interface_configure(false, $ifkey, false, false);
} else {
log_msg("CARP event: already 'MASTER' for interface '$ifkey', nothing to do.");
}
// BACKUP event
} else {
// Disable WAN only if it's currently enabled
if ($ifkey_enabled) {
log_msg("CARP event: switching to '$type', disabling interface '$ifkey'.", LOG_WARNING);
unset($config['interfaces'][$ifkey]['enable']);
write_config("disable interface '$ifkey' due CARP event '$type'", false);
interface_configure(false, $ifkey, false, false);
// Create lock file
file_put_contents($lock_file, time());
sleep($lock_default_age);
@unlink($lock_file);
} else {
log_msg("CARP event: already '$type' for interface '$ifkey', nothing to do.");
}
}