Auto-update blog content from Obsidian: 2025-10-10 21:19:13

Auto-update blog content from Obsidian: 2025-10-10 21:11:12
Auto-update blog content from Obsidian: 2025-10-10 20:47:50
2025-10-10 21:19:13 +00:00 · 2025-10-10 21:11:12 +00:00 · 2025-10-10 20:47:50 +00:00 · 2025-10-10 20:23:11 +00:00 · 2025-09-29 13:13:12 +00:00 · 2025-09-29 12:59:15 +00:00
128 changed files with 7738 additions and 109 deletions
--- a/.gitea/workflows/deployment.yml
+++ b/.gitea/workflows/deployment.yml
@@ -4,6 +4,8 @@ on:
  push:
    branches:
      - preview
  schedule:
    - cron: '0 3 * * 5'
 env:
  DOCKER_IMAGE: vezpi-blog
@@ -20,9 +22,10 @@ jobs:
      newer_version_available: ${{ steps.compare.outputs.version }}
      current_docker_image: ${{ steps.current_docker.outputs.image }}
      docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
      dev_lock_present: ${{ steps.check_dev_lock.outputs.locked }}
    steps:
      - name: Checkout Repository
-        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
+        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
      - name: Check Latest Hugo Version
        id: get_latest
@@ -68,6 +71,15 @@ jobs:
          fi
          echo "changed=$docker_folder_changed" | tee -a $GITEA_OUTPUT
      - name: Check for .dev-lock file
        id: check_dev_lock
        run: |
          if [ -f .dev-lock ]; then
            echo "locked=true" | tee -a $GITEA_OUTPUT
          else
            echo "locked=false" | tee -a $GITEA_OUTPUT
          fi
  Build:
    needs: Check-Rebuild
    if: needs.Check-Rebuild.outputs.newer_version_available == 'true' || needs.Check-Rebuild.outputs.docker_folder_changed == 'true'
@@ -77,7 +89,7 @@ jobs:
        shell: sh
    steps:
      - name: Checkout Repository
-        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
+        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
      - name: Build Docker Image
        run: |  
@@ -89,9 +101,7 @@ jobs:
          docker tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} ${DOCKER_IMAGE}:latest
  Deploy-Staging:
-    needs: 
+    needs: [Check-Rebuild, Build]
      - Check-Rebuild
      - Build
    if: always() && needs.Check-Rebuild.result == 'success' && (needs.Build.result == 'skipped' || needs.Build.result == 'success')
    runs-on: docker
    container:
@@ -131,7 +141,8 @@ jobs:
          fi
  Merge:
-    needs: Test-Staging
+    needs: [Check-Rebuild, Test-Staging]
    if: needs.Test-Staging.result == 'success' && needs.Check-Rebuild.outputs.dev_lock_present == 'false'
    runs-on: ubuntu
    steps:
      - name: Checkout Repository
@@ -185,10 +196,7 @@ jobs:
          fi
  Clean:
-    needs:
+    needs: [Check-Rebuild, Build, Test-Production]
      - Check-Rebuild
      - Build
      - Test-Production
    runs-on: docker
    defaults:
      run:
@@ -198,3 +206,40 @@ jobs:
        run: |
          docker image rm ${{ needs.Check-Rebuild.outputs.current_docker_image }} --force
  Notify:
    needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
    runs-on: ubuntu
    if: always() && needs.Check-Rebuild.outputs.dev_lock_present == 'false'
    env:
      NTFY_URL: https://ntfy.vezpi.com
      NTFY_TOPIC: blog
      NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
    steps:
      - name: Notify Workflow Result
        run: |
          if [[
            "${{ needs.Check-Rebuild.result }}" == "success" &&
           ("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
            "${{ needs.Deploy-Staging.result }}" == "success" &&
            "${{ needs.Test-Staging.result }}" == "success" && 
            "${{ needs.Merge.result }}" == "success" &&
            "${{ needs.Deploy-Production.result }}" == "success" &&
            "${{ needs.Test-Production.result }}" == "success" &&
           ("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
          ]]; then
            curl -H "Priority: min" \
                 -H "Tags: white_check_mark" \
                 -d "Blog workflow completed successfully." \
                 -u ${NTFY_TOKEN} \
                 ${NTFY_URL}/${NTFY_TOPIC}
          else
            curl -H "Priority: high" \
                 -H "Tags: x" \
                 -H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
                              view, Verify Blog, https://blog.vezpi.com, clear=true" \
                 -d "Blog workflow failed!" \
                 -u ${NTFY_TOKEN} \
                 ${NTFY_URL}/${NTFY_TOPIC}
          fi
--- a/assets/icons/brand-git.svg
+++ b/assets/icons/brand-git.svg
@@ -1 +0,0 @@
 <svg  xmlns="http://www.w3.org/2000/svg"  width="24"  height="24"  viewBox="0 0 24 24"  fill="none"  stroke="currentColor"  stroke-width="2"  stroke-linecap="round"  stroke-linejoin="round"  class="icon icon-tabler icons-tabler-outline icon-tabler-brand-git"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M16 12m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 8m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 16m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 15v-6" /><path d="M15 11l-2 -2" /><path d="M11 7l-1.9 -1.9" /><path d="M13.446 2.6l7.955 7.954a2.045 2.045 0 0 1 0 2.892l-7.955 7.955a2.045 2.045 0 0 1 -2.892 0l-7.955 -7.955a2.045 2.045 0 0 1 0 -2.892l7.955 -7.955a2.045 2.045 0 0 1 2.892 0z" /></svg>
--- a/assets/icons/brightness-up.svg
+++ b/assets/icons/brightness-up.svg
@@ -1 +0,0 @@
 <svg  xmlns="http://www.w3.org/2000/svg"  width="24"  height="24"  viewBox="0 0 24 24"  fill="currentColor"  class="icon icon-tabler icons-tabler-filled icon-tabler-brightness-up"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M12 8a4 4 0 1 1 -3.995 4.2l-.005 -.2l.005 -.2a4 4 0 0 1 3.995 -3.8z" /><path d="M12 2a1 1 0 0 1 .993 .883l.007 .117v2a1 1 0 0 1 -1.993 .117l-.007 -.117v-2a1 1 0 0 1 1 -1z" /><path d="M17.693 4.893a1 1 0 0 1 1.497 1.32l-.083 .094l-1.4 1.4a1 1 0 0 1 -1.497 -1.32l.083 -.094l1.4 -1.4z" /><path d="M21 11a1 1 0 0 1 .117 1.993l-.117 .007h-2a1 1 0 0 1 -.117 -1.993l.117 -.007h2z" /><path d="M16.293 16.293a1 1 0 0 1 1.32 -.083l.094 .083l1.4 1.4a1 1 0 0 1 -1.32 1.497l-.094 -.083l-1.4 -1.4a1 1 0 0 1 0 -1.414z" /><path d="M12 18a1 1 0 0 1 .993 .883l.007 .117v2a1 1 0 0 1 -1.993 .117l-.007 -.117v-2a1 1 0 0 1 1 -1z" /><path d="M6.293 16.293a1 1 0 0 1 1.497 1.32l-.083 .094l-1.4 1.4a1 1 0 0 1 -1.497 -1.32l.083 -.094l1.4 -1.4z" /><path d="M6 11a1 1 0 0 1 .117 1.993l-.117 .007h-2a1 1 0 0 1 -.117 -1.993l.117 -.007h2z" /><path d="M4.893 4.893a1 1 0 0 1 1.32 -.083l.094 .083l1.4 1.4a1 1 0 0 1 -1.32 1.497l-.094 -.083l-1.4 -1.4a1 1 0 0 1 0 -1.414z" /></svg>
--- a/assets/icons/message-language.svg
+++ b/assets/icons/message-language.svg
@@ -0,0 +1 @@
 <svg  xmlns="http://www.w3.org/2000/svg"  width="24"  height="24"  viewBox="0 0 24 24"  fill="none"  stroke="currentColor"  stroke-width="2"  stroke-linecap="round"  stroke-linejoin="round"  class="icon icon-tabler icons-tabler-outline icon-tabler-message-language"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 21v-13a3 3 0 0 1 3 -3h10a3 3 0 0 1 3 3v6a3 3 0 0 1 -3 3h-9l-4 4" /><path d="M10 14v-4a2 2 0 1 1 4 0v4" /><path d="M14 12h-4" /></svg>
--- a/assets/icons/toggle_to_en.svg
+++ b/assets/icons/toggle_to_en.svg
--- a/assets/icons/toggle_to_fr.svg
+++ b/assets/icons/toggle_to_fr.svg
--- a/assets/scss/custom.scss
+++ b/assets/scss/custom.scss
@@ -0,0 +1,11 @@
 .homepage-header {
  text-align: center;
 }
 .lang-toggle-icon {
  margin-left: auto;
  svg {
    width: 64px;
    height: 24px;
  }
 }
--- a/content/_index.fr.md
+++ b/content/_index.fr.md
@@ -0,0 +1,6 @@
 ---
 title: Bienvenue sur Vezpi Lab
 description: Ici les derniers articles
 ---
 Ce blog partage mes projets et expériences dans mon homelab.  
 Vous trouverez ci-dessous les derniers articles.
--- a/content/_index.md
+++ b/content/_index.md
@@ -0,0 +1,6 @@
 ---
 title: Welcome to Vezpi Lab
 description: Here the latest articles
 ---
 This blog shares project and experiments from my homelab.  
 Below you'll find the latest articles.
--- a/content/page/about/index.fr.md
+++ b/content/page/about/index.fr.md
@@ -9,12 +9,12 @@ menu:
    params:
      icon: user
 ---
-Salut ! Moi c'est Etienne, j'adore l'**automatisation** et je suis un amateur de projets **homelab**. Je suis un expert Linux et travaille comme **Senior Cloud DevOps Engineer** chez Capgemini.
+Salut ! Moi c'est Etienne, j'adore l'**automatisation** et je suis un amateur de projets **homelab**. Je suis un expert Linux et je travaille comme **Senior Cloud DevOps Engineer** chez Capgemini.
-Motivé par la passion, j'aime explorer de nouvelles technologie, comprendre comment elles fonctionnement et les expérimenter chez moi, juste pour le plaisir. Mon lab est passé d'un simple espace de bidouille à un véritable terrain de jeu pour expérimenter la virtualisation, l'orchestration de conteneurs, le réseau, et bien plus encore.
+Motivé par la passion, j'aime explorer de nouvelles technologies, comprendre comment elles fonctionnent et les expérimenter chez moi, juste pour le plaisir. Mon lab est passé d'un simple espace de bidouille à un véritable terrain de jeu pour expérimenter la virtualisation, l'orchestration de conteneurs, le réseau, et bien plus encore.
 Ce blog est ma façon de documenter ce que je construis, casse (volontairement !), répare et surtout, ce que j'**apprends**. C'est une référence personnelle, mais aussi un moyen de partager avec la communauté, de m’open source, au cas où quelqu'un d'autre suivrait le même chemin et trouverait mon expérience utile.
-Même si je suis français, J'écris mes notes personnelles en anglais pour perfectionner l'utilisation de cette langue, mais j’essaye tout de même de les traduire dans ma langue maternelle.
+Même si je suis français, j'écris mes notes personnelles en anglais pour perfectionner l'utilisation de cette langue, mais j’essaie tout de même de les traduire dans ma langue maternelle.
 Si vous travaillez sur un projet intéressant, si vous avez des questions ou si vous souhaitez proposer de nouvelles idées, **n'hésitez pas à me contacter** !
--- a/content/post/10-opnsense-crash-disk-panic.fr.md
+++ b/content/post/10-opnsense-crash-disk-panic.fr.md
@@ -0,0 +1,224 @@
 ---
 slug: opnsense-crash-disk-panic
 title: Mon Routeur OPNsense Crash, de la Panique à la Renaissance
 description: L'histoire sur comment j’ai survécu à un crash OPNsense à cause d'une disque défaillant et pourquoi un fichier XML a tout changé.
 date: 2025-08-24
 draft: false
 tags:
  - opnsense
 categories:
  - homelab
 ---
 ## Intro
 Cette semaine, j’ai vécu mon premier vrai problème dans mon homelab, qui a fait tomber tout mon réseau à la maison.
 Mon routeur OPNsense a crash et, après plusieurs tentatives de récupération ratées, j’ai finalement dû le réinstaller from scratch. Heureusement, presque toute la configuration est revenue grâce à un simple fichier XML. Dans cette histoire, je vais raconter ce qui s’est passé, ce que j’ai fait pour m’en sortir, et aussi ce que je n’aurais pas dû faire.
 Ce genre d’exercice est la pire chose que vous souhaitez voir arriver, parce que ce n’est jamais amusant de voir tout exploser. Mais c’est de loin la meilleure façon d’apprendre.
 ## Le Calme Avant la Tempête
 Ma box OPNsense tournait parfaitement depuis des mois. Routeur, pare-feu, DNS, DHCP, VLANs, VPN, reverse proxy et même contrôleur UniFi : toutes les pièces de mon homelab passe par elle. Mais pas seulement, elle fournit aussi Internet à la maison.
 ![Diagram of my home network ](img/homelab-network-schema.png)
 Cette box est le cœur de mon réseau, sans elle, je ne peux quasiment rien faire. J’ai détaillé son fonctionnement dans ma section [Homelab]({{< ref "page/homelab" >}}). Tout “fonctionnait juste”, et je ne m’en inquiétait pas. J’étais confiant, sa sauvegarde vivait uniquement à l’intérieur de la machine…
 Peut-être trop confiant.
 ## Le Redémarrage Inattendu
 Sans prévenir, la box a redémarré toute seule, juste avant minuit. Par chance, je passais à côté de mon rack en allant me coucher. J’ai su qu’elle avait redémarré car j’ai entendu son petit bip de démarrage.
 Je me suis demandé pourquoi le routeur avait redémarré sans mon accord. Dans mon lit, j’ai rapidement vérifié si Internet fonctionnait : oui. Mais aucun de mes services n’était disponible, ni la domotique, ni ce blog. J’étais fatigué, je réglerais ça le lendemain…
 Au matin, en regardant les logs, j’ai trouvé le coupable :
 ```
 panic: double fault
 ```
 Un kernel panic. Mon routeur avait littéralement planté au niveau matériel.
 ## Premières Tentatives de Dépannage
 Au début, l’impact semblait mineur. Un seul service ne redémarrait pas : Caddy, mon reverse proxy. Ce qui expliquait pourquoi mes services n’étaient pas accessibles.
 En fouillant dans les logs, j’ai trouvé l’erreur :
 ```
 caching certificate: decoding certificate metadata: unexpected end of JSON input
 ```
 Un des certificats mis en cache avait été corrompu pendant le crash. En supprimant son dossier de cache, Caddy est reparti et, d’un coup, tous mes services HTTPS étaient de retour.
 Je pensais avoir esquivé la balle. Je n’ai pas cherché plus loin sur la cause réelle : les logs du kernel étaient pollués par une interface qui “flappait”, j’ai cru à un simple bug. À la place, je me suis lancé dans une mise à jour, ma première erreur.
 Mon instance OPNsense était en version 25.1, et la 25.7 venait de sortir. Allons-y gaiement !
 La mise à jour s’est déroulée correctement, mais quelque chose clochait. En cherchant de nouvelles updates, j’ai vu une corruption dans `pkg`, la base de données du gestionnaire de paquets :
 ```
 pkg: sqlite error while executing iterator in file pkgdb_iterator.c:1110: database disk image is malformed
 ```
 🚨 Mon alarme interne s'est déclenchée. J’ai pensé aux sauvegardes et j’ai immédiatement téléchargé la dernière :  
 ![Backup configuration in OPNsense](img/opnsense-download-backup.png)
 En cliquant sur le bouton `Download configuration`, j’ai récupéré le `config.xml` en cours d’utilisation. Je pensais que ça suffirait.
 ## Corruption du Système de Fichiers
 J’ai tenté de réparer la base `pkg` de la pire façon possible : j’ai sauvegardé le dossier `/var/db/pkg` puis essayé de refaire un `bootstrap` :
 ```bash
 cp -a /var/db/pkg /var/db/pkg.bak
 pkg bootstrap -f
 ```
 ```
 The package management tool is not yet installed on your system.
 Do you want to fetch and install it now? [y/N]: y
 Bootstrapping pkg from https://pkg.opnsense.org/FreeBSD:14:amd64/25.7/latest, please wait...
 [...]
 pkg-static: Fail to extract /usr/local/lib/libpkg.a from package: Write error
 Failed to install the following 1 package(s): /tmp//pkg.pkg.scQnQs
 [...]
 A pre-built version of pkg could not be found for your system.
 ```
 J’ai vu un `Write error`. Je soupçonnais un problème disque. J’ai lancé `fsck` et reçu un flot d’incohérences :
 ```bash
 fsck -n
 ```
 ```
 [...]
 INCORRECT BLOCK COUNT I=13221121 (208384 should be 208192)
 INCORRECT BLOCK COUNT I=20112491 (8 should be 0)
 INCORRECT BLOCK COUNT I=20352874 (570432 should be 569856)
 [...]
 FREE BLK COUNT(S) WRONG IN SUPERBLK
 [...]
 SUMMARY INFORMATION BAD
 [...]
 BLK(S) MISSING IN BIT MAPS
 [...]
 ***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****
 ```
 Le système de fichiers root était en mauvais état.
 N’ayant que SSH et pas de console, j’ai forcé un `fsck` au prochain redémarrage :
 ```bash
 sysrc fsck_y_enable="YES"
 sysrc background_fsck="NO"
 reboot
 ```
 Au redémarrage, le système a été réparé suffisamment pour relancer `pkg bootstrap`. Mais la moitié des paquets système avaient disparu. Ma mise à jour précédente sur un disque corrompu m’avait laissé avec un système bancal, à moitié installé, à moitié manquant.
 ## Quand ça empire
 J’ai découvert l’utilitaire `opnsense-bootstrap`, censé remettre le système à plat :
 - Suppression de tous les paquets installés
 - Téléchargement et installation d’un nouveau noyau/base 25.7
 - Réinstallation des paquets standards
 Parfait !
 ```
 opnsense-bootstrap
 ```
 ```
 This utility will attempt to turn this installation into the latest OPNsense 25.7 release. All packages will be deleted, the base system and kernel will be replaced, and if all went well the system will automatically reboot. Proceed with this action? [y/N]:
 ```
 J’ai dit `y`. Ça commencé bien, puis… plus rien. Plus de signal. Plus d’Internet. Je croyais que ce bootstrap allait me sauver. En fait, il m’a enterré.
 🙈 Oups.
 Après un moment, j'ai tenté de le redémarré, mais impossible de me reconnecter en SSH. Pas le choix, j'ai du sortir le routeur du rack, le poser sur mon bureau, brancher écran et clavier et voir ce qui se passait.
 ## Repartir de zéro
 C’était mauvais signe :
 ```
 Fatal error: Uncaught Error: Class "OPNsense\Core\Config" not found
 in /usr/local/etc/inc/config.inc:143
 ```
 Et les logs du bootstrap étaient pires :
 ```
 bad dir ino … mangled entry
 Input/output error
 ```
 Le disque était pas en forme. Je ne pouvais plus rien sauver. Il était temps de repartir de zéro. Heureusement, j’avais une sauvegarde… non ?
 J’ai téléchargé l’ISO OPNsense 25.7, créé une clé USB bootable, et réinstallé par-dessus, en laissant les paramètres par défaut.
 ## Le sauveur : `config.xml`
 OPNsense garde toute sa configuration dans un seul fichier : `/conf/config.xml`. Ce fichier a été ma bouée de sauvetage.
 J'ai copié le `config.xml` sauvegardé avant dans ma clé USB. Quand je l'ai connectée sur la machine nouvellement installée, j'ai remplacé le fichier :
 ```bash
 mount -t msdosfs /dev/da0s1 /mnt
 cp /mnt/config.xml /conf/config.xml
 ```
 J’ai remis le routeur dans le rack, croisé les doigts… *bip !* 🎉
 Le DHCP m’a donné une adresse, bon signe. Je pouvais accéder à l’interface web, super. Ma configuration était là, à peu près tout sauf les plugins, comme prévu. Je ne peux pas les installer immédiatement, car ils nécessitent une autre mise à jour. Mettons à jour !
 Ce fichier XML à lui seul m'a permis de reconstruire mon routeur sans perdre la raison.
 Sans DNS (AdGuard non installé), j’ai temporairement pointé le DNS pour le système vers `1.1.1.1`.
 ## Le Dernier Souffle
 Lors de la mise à jour suivante, rebelote : erreurs, reboot, crash. La machine de nouveau plus accessible...
 Je pouvais officiellement déclarer mon disque NVMe mort. 
 🪦 Repose en paix, merci pour tes loyaux services.
 Par chance, j’avais un NVMe Kingston 512 Go encore neuf, livré avec cette machine.  Je ne l'avais jamais utilisé car j'avais préféré réutiliser celui à l'intérieur de mon serveur *Vertex*.
 J’ai refait l’installation d'OPNsense dessus, et cette fois tout a fonctionné : passage en 25.7.1 et réinstallation des plugins officiels que j'utilisais.
 Pour les plugins custom (AdGuard Home et UniFi), il a fallu ajouter le repo tiers dans `/usr/local/etc/pkg/repos/mimugmail.conf` (documentation [ici](https://www.routerperformance.net/opnsense-repo/)) 
 ```json
 mimugmail: {
  url: "https://opn-repo.routerperformance.net/repo/${ABI}",
  priority: 5,
  enabled: yes
 }
 ```
 Après un dernier reboot, le routeur était presque prêt, mais je n'avais toujours pas de DNS. C'était à cause de AdGuard Home qui n'était pas configuré
 ⚠️ La configuration des plugins tiers ne sont pas sauvegardés dans `config.xml`.
 Reconfigurer AdGuard Home n'était pas bien compliqué, finalement mon DNS fonctionne et t out était revenu à la normale… sauf le contrôleur UniFi.
 ## Leçons Apprises à la Dure
 - **Les sauvegardes comptent** : Je me retrouve toujours à penser que les sauvegardes ne sont pas fondamentales... jusqu'à ce qu'on ait besoin de restaurer et qu'il est trop tard.
 - **Gardez-les sauvegardes hors de la machine** : j’ai eu de la chance de récupérer le `config.xml` avant que mon disque me lâche. J'aurais vraiment passer un mauvais moment à tout restaurer entièrement.
 - **Vérifier la santé après un crash** : ne pas ignorer un kernel panic.
 - **Erreurs I/O = alerte rouge** : j’ai perdu des heures à batailler avec un disque condamné.
 - **Les plugins non-officiels ne sont pas sauvegardés** : La configuration d'OPNsense et de ces plugins officiels sont sauvegardés, ce n'est pas le cas pour les autres.
 - **Mon routeur est un SPOF** (*Un point de défaillance unique*) : Dans mon homelab, je voulais avoir le maximum d'éléments hautement disponible, il me faut trouver une meilleure solution.
 ## Aller de l’Avant
 Je dois sérieusement repenser ma stratégie de sauvegarde. J’ai toujours repoussé, jusqu’à ce qu’il soit trop tard. Ça faisait longtemps que je n’avais pas subi une panne matérielle. Quand ça arrive, ça pique.
 Au départ, je pensais qu’un routeur sur son propre hardware était plus sûr. J’avais tort. Je vais réfléchir à une virtualisation sous Proxmox pour l’avoir en haute dispo. Un beau projet en perspective !
 ## Conclusion
 Mon routeur OPNsense est passé d’un simple redémarrage aléatoire à un disque mort, avec un vrai rollercoaster de dépannage. Au final, je suis presque content que ça soit arrivé : j’ai appris bien plus qu’avec une mise à jour sans accroc.
 Si vous utilisez OPNsense (ou n’importe quel routeur), retenez ça :  
 **Gardez une sauvegarde hors de la machine.**
 Parce que quand ça casse, et ça finira par casser, c’est ce petit fichier XML qui peut sauver tout votre homelab.
 Restez safe, faites des sauvegardes.
--- a/content/post/10-opnsense-crash-disk-panic.md
+++ b/content/post/10-opnsense-crash-disk-panic.md
@@ -0,0 +1,225 @@
 ---
 slug: opnsense-crash-disk-panic
 title: My OPNsense Router Crash, from Panic to Reborn
 description: The story of how I survived an OPNsense crash with a failing disk and why one backup XML made all the difference.
 date: 2025-08-24
 draft: false
 tags:
  - opnsense
 categories:
  - homelab
 ---
 ## Intro
 This week, I experienced my first real problem on my homelab, which caused my whole home network to go down. 
 My OPNsense router crashed and, after several failed recovery attempts, I finally had to reinstall it from scratch. Luckily, almost all of the configuration came back thanks to a single XML file. In that story, I will tell you what happened, what I did to recover and what I shouldn't have done.
 This kind of exercise is the worst thing you want to happen because it's never funny to have everything go boom, but this is by far the best way to learn.
 ## The Calm Before the Storm
 My OPNsense box had been running smoothly for months. Router, firewall, DNS, DHCP, VLANs, VPN, reverse proxy and even UniFi controller: all the pieces of my homelab run through it. but not only, it is also serving internet at home.
 ![Diagram of my home network ](img/homelab-network-schema.png)
 This box is the heart of my network, without it, I can hardly do anything. I have detailed how this is working in my [Homelab]({{< ref "page/homelab" >}}) section. It was “just working,” and I wasn’t worried about it. I felt confident, its backup was living only inside the machine...
 Maybe too confident.
 ## The Unexpected Reboot
 Out of nowhere, the box rebooted by itself just before midnight. By chance, I was just passing by my rack on my way to bed. I knew it had rebooted because I heard its little startup beep.
 I wondered why the router restarted without my will. In my bed, I quickly checked if internet was working, and it was. But none of my services were available, my home automation or even this blog. I was tired, I would fix that the next day...
 In the morning, looking at the logs, I found the culprit:
 ```
 panic: double fault
 ```
 A kernel panic. My router had literally crashed at the hardware level.
 ## First Troubleshooting Attempts
 At first, the impact seemed minor. Only one service wasn’t coming back up: Caddy, my reverse proxy. That was making sense if my services weren't available.
 Digging into the logs, I found the error:
 ```
 caching certificate: decoding certificate metadata: unexpected end of JSON input
 ```
 It turned out that one of the cached certificates had been corrupted during the crash. Deleting its cache folder fixed Caddy, and suddenly all my HTTPS services were back online.
 I thought I had dodged the bullet. I didn't investigate much on the root cause analysis: the kernel logs were polluted by one of the interfaces flapping, I thought it was just a bug. Instead, I went ahead and checked for updates, my first mistake.
 My OPNsense instance was in version 25.1, and the newer 25.7 was available. Let's upgrade it, yay!
 The upgrade rolled out successfully, but something was wrong. When I tried to look for any update, I saw a corruption in `pkg`, the package manager database:
 ```
 pkg: sqlite error while executing iterator in file pkgdb_iterator.c:1110: database disk image is malformed
 ```
 🚨 My internal alarm sensor triggered, I wondered about backups, I immediately decided to download the latest backup:
 ![Backup configuration in OPNsense](img/opnsense-download-backup.png)
 Clicking the `Download configuration` button, I downloaded the current `config.xml` in use my the instance, I though it was enough.
 ## Filesystem Corruption
 I decided to recover the pkg database the worst possible way, I backed up the `/var/db/pkg` folder and I tried to `bootstrap` it:
 ```bash
 cp -a /var/db/pkg /var/db/pkg.bak
 pkg bootstrap -f
 ```
 ```
 The package management tool is not yet installed on your system.
 Do you want to fetch and install it now? [y/N]: y
 Bootstrapping pkg from https://pkg.opnsense.org/FreeBSD:14:amd64/25.7/latest, please wait...
 [...]
 pkg-static: Fail to extract /usr/local/lib/libpkg.a from package: Write error
 Failed to install the following 1 package(s): /tmp//pkg.pkg.scQnQs
 [...]
 A pre-built version of pkg could not be found for your system.
 ```
 I saw a `Write error`, I suspect a filesystem problem, I run a check on `fsck`, the output was a flood of inconsistencies:
 ```bash
 fsck -n
 ```
 ```
 [...]
 INCORRECT BLOCK COUNT I=13221121 (208384 should be 208192)
 INCORRECT BLOCK COUNT I=20112491 (8 should be 0)
 INCORRECT BLOCK COUNT I=20352874 (570432 should be 569856)
 [...]
 FREE BLK COUNT(S) WRONG IN SUPERBLK
 [...]
 SUMMARY INFORMATION BAD
 [...]
 BLK(S) MISSING IN BIT MAPS
 [...]
 ***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****
 ```
 The root filesystem was in bad shape.
 Since I only had SSH at this point and no console access, I set up a forced `fsck` for next reboot:
 ```bash
 sysrc fsck_y_enable="YES"
 sysrc background_fsck="NO"
 reboot
 ```
 On the next boot, the filesystem was repaired enough to let me bootstrap `pkg` again, but most of the system packages were gone. My earlier upgrade while the disk was dirty had left me with a half-installed, half-missing software.
 ## When Things Got Worse
 I discovered the utility `opnsense-bootstrap`, which promises to reinstall all packages and reset the system to a clean release, exactly what I was looking for:
 - Remove all installed packages.
 - Fresh 25.7 base system and kernel will be downloaded and installed.
 - All standard OPNsense packages will be reinstalled.
 Wonderful!
 ```
 opnsense-bootstrap
 ```
 ```
 This utility will attempt to turn this installation into the latest OPNsense 25.7 release. All packages will be deleted, the base system and kernel will be replaced, and if all went well the system will automatically reboot. Proceed with this action? [y/N]:
 ```
 I pressed `y`. This started well, but then... no more signal -> no more internet. I thought this bootstrap would save me. Instead, it buried me.
 🙈 Oops.
 After a while, I tried to reboot, but impossible to connect back via SSH. No other solution, I had to remove the router from the rack, put it on my desk and plug it a screen and a keyboard to see what is going on.
 ## Starting Over the Hard Way
 This was bad:
 ```
 Fatal error: Uncaught Error: Class "OPNsense\Core\Config" not found
 in /usr/local/etc/inc/config.inc:143
 ```
 Checking the bootstrap logs, this was even worse:
 ```
 bad dir ino … mangled entry
 Input/output error
 ```
 The disk is in a bad shape, at this point, I couldn’t save the install anymore. Time to start from scratch. Luckily, I had a backup… right?
 I downloaded the latest OPNsense ISO (v25.7) and put it into a USB stick. I reinstall OPNsense and overwrite the current installation, I kept everything as default.
 ## The Lifesaver: `config.xml`
 OPNsense keeps the whole configuration in a single file: `/conf/config.xml`. That file was my lifeline.
 I copied the `config.xml`file saved earlier into the USB stick. When plugged into the fresh OPNsense box, I overwrite the file:
 ```bash
 mount -t msdosfs /dev/da0s1 /mnt
 cp /mnt/config.xml /conf/config.xml
 ```
 I placed the router back in the rack, powered it on and crossed my fingers... *beep!* 🎉
 The DHCP gave me an address, good start. I could reach its URL, awesome. My configuration is here, almost everything but the plugins, as expected. I can't install them right away because they need another update, let's update it!
 This single XML file is the reason I could rebuild my router without losing my sanity
 DNS is KO because the AdGuard Home plugin is not installed, I temporary set the system DNS to `1.1.1.1`
 ## The Last Breath
 During that upgrade, the system threw errors again… and then rebooted itself. Another crash, not turning back on...
 I can officially say that my NVMe drive is dead.
 🪦 Rest in peace, thank you for your great services.
 Luckily, I had a spare 512GB Kingston NVMe that came with that box. I never used it because I preferred to reuse the one inside my *Vertex* server.
 I redo the same steps to reinstall OPNsense on that disk and this time everything worked: I could finally update OPNsense to 25.7.1 and reinstall all the official plugins that I was using. 
 To install custom plugins (AdGuard Home and Unifi), I had to add the custom repository in `/usr/local/etc/pkg/repos/mimugmail.conf` (documentation [here](https://www.routerperformance.net/opnsense-repo/)) 
 ```json
 mimugmail: {
  url: "https://opn-repo.routerperformance.net/repo/${ABI}",
  priority: 5,
  enabled: yes
 }
 ```
 After a final reboot, the router is almost ready, but I still don't have DNS services. This is because AdGuard Home is not configured.
 ⚠️ Custom plugin configuration is not saved within the backup in `config.xml`.
 Reconfigure AdGuard Home is pretty straight forward, finally my DNS is working and everything is back to nominal... except the UniFi controller.
 ## Lessons Learned the Hard Way
 - **Backups matter**: I always found myself thinking backups are not relevant... until you need to restore and it's too late.
 - **Keep backups off the box**: I was lucky to get the `config.xml` before my disk die, I would have a really hard time to fully recover.
 - **Healthcheck after a crash**: Do not ignore a kernel panic.
 - **I/O errors = red flag**: I should have stopped trying to repair. I lost hours fighting a dead disk.
 - **Custom plugin configs aren’t include**d: OPNsense configuration and its official plugin are saved into the backups, this is not the case for the others.
 - **My router is a SPOF** (*single point of failure*): In my homelab, I wanted to have most of my elements highly available, I need to find a better solution.
 ## Moving Forward
 I really need to think on my backup strategy. I'm too lazy and always keep it for later, until it is too late. It's been a long time since I've been struck by a hardware failure. When it strikes, it hurts.
 Initially I wanted my router to be in its own hardware because I thought it was safe, I was damn wrong. I will think on a solution to virtualize OPNsense in Proxmox to have it highly available, a great project in perspective!
 ## Conclusion
 My OPNsense router went from a random reboot to a dead disk, with a rollercoaster of troubleshooting. In the end, I'm almost happy with what happened, it taught me more than any smooth upgrade ever could.
 If you run OPNsense (or any router), remember this:  
 **Keep a backup off the box.**
 Because when things go wrong, and eventually they will, that one little XML file can save your homelab.
 Stay safe, make backups.
--- a/content/post/11-proxmox-cluster-networking-sdn.fr.md
+++ b/content/post/11-proxmox-cluster-networking-sdn.fr.md
@@ -0,0 +1,148 @@
 ---
 slug: proxmox-cluster-networking-sdn
 title: Simplifier la gestion des VLAN dans Proxmox VE avec le SDN
 description: Découvrez comment centraliser la configuration des VLAN dans Proxmox VE grâce aux zones SDN et aux VNets, pour un réseau plus simple et cohérent.
 date: 2025-09-12
 draft: false
 tags:
  - proxmox
 categories:
  - homelab
 ---
 ## Intro
 Quand j’ai construit mon cluster **Proxmox VE 8** pour la première fois, le réseau n’était pas ma priorité. Je voulais simplement remplacer rapidement un vieux serveur physique, alors j’ai donné la même configuration de base à chacun de mes trois nœuds, créé le cluster et commencé à créer des VM :  
 ![Configuration réseau d’un nœud Proxmox](img/proxmox-node-network-configuration.png)
 Cela a bien fonctionné pendant un moment. Mais comme je prévois de virtualiser mon routeur **OPNsense**, j’ai besoin de quelque chose de plus structuré et cohérent. C’est là que la fonctionnalité **S**oftware-**D**efined **N**etworking (SDN) de Proxmox entre en jeu.
 ---
 ## Mon Réseau Homelab
 Par défaut, chaque nœud Proxmox dispose de sa propre zone locale, appelée `localnetwork`, qui contient le pont Linux par défaut (`vmbr0`) comme VNet :  
 ![Proxmox default `localnetwork` zones](img/proxmox-default-localnetwork-zone.png)
 C’est suffisant pour des configurations isolées, mais rien n’est coordonné au niveau du cluster.
 Mon objectif est simple : déclarer les VLAN que j’utilise déjà dans mon réseau, afin de pouvoir y rattacher des VM facilement depuis n’importe quel nœud.
 Voici la liste des VLAN que j’utilise actuellement :
 | Nom       | ID   | Usage                          |
 | --------- | ---- | ------------------------------ |
 | Mgmt      | 1    | Administration                 |
 | User      | 13   | Réseau domestique              |
 | IoT       | 37   | IoT et équipements non fiables |
 | DMZ       | 55   | Services exposés à Internet    |
 | Lab       | 66   | Réseau de lab                  |
 | Heartbeat | 77   | Heartbeat du cluster Proxmox   |
 | Ceph      | 99   | Stockage Ceph                  |
 | VPN       | 1337 | Réseau WireGuard               |
 ---
 ## Aperçu du SDN Proxmox
 Le Software-Defined Networking de Proxmox permet de définir des zones et réseaux virtuels à l’échelle du cluster. Au lieu de répéter la configuration des VLAN sur chaque nœud, le SDN offre une vue centralisée et assure la cohérence.
 En interne, Proxmox repose essentiellement sur les fonctionnalités réseau standard de Linux, ce qui évite d’ajouter des dépendances externes et garantit la stabilité.
 Les configurations SDN sont stockées dans `/etc/pve/sdn` et répliquées sur l’ensemble du cluster. Les changements sont appliqués de manière atomique (on prépare les modifications puis on clique sur `Apply`), ce qui rend les déploiements plus sûrs.
 ### Zones
 Une **Zone** définit un domaine réseau séparé. Les zones peuvent couvrir certains nœuds et contenir des **VNets**.
 Proxmox prend en charge plusieurs types de zones :
 - **Simple** : pont isolé (bridge) avec routage L3/NAT
 - **VLAN** : segmentation classique via VLAN
 - **QinQ** : empilement de VLAN (IEEE 802.1ad)
 - **VXLAN** : réseau L2 via encapsulation UDP
 - **EVPN** : VXLAN avec BGP pour du routage L3 dynamique
 Comme mon réseau domestique utilise déjà des VLAN, j’ai créé une **zone VLAN** appelée `homelan`, en utilisant `vmbr0` comme pont et en l’appliquant à tout le cluster :  
 ![Create a VLAN zone in the Proxmox SDN](img/proxmox-create-vlan-zone-homelan.png)
 ### VNets
 Un **VNet** est un réseau virtuel à l’intérieur d’une zone. Dans une zone VLAN, chaque VNet correspond à un ID VLAN spécifique.
 J’ai commencé par créer `vlan55` dans la zone `homelan` pour mon réseau DMZ :  
 ![Create a VNet for VLAN 55 in the homelan zone](img/proxmox-create-vlan-vnet-homelan.png)
 Puis j’ai ajouté les VNets correspondant à la plupart de mes VLAN, puisque je prévois de les rattacher à une VM OPNsense :  
 ![All my VLANs created in the Proxmox SDN](img/proxmox-sdn-all-vlan-homelan.png)
 Enfin, j’ai appliqué la configuration dans **Datacenter → SDN** :  
 ![Application de la configuration SDN dans Proxmox](img/proxmox-apply-sdn-homelan-configuration.png)
 ---
 ## Test de la Configuration Réseau
 Dans une vieille VM que je n'utilise plus, je remplace l'actuel `vmbr0` avec le VLAN tag 66 par mon nouveau VNet `vlan66`:
 ![Change the network bridge in a VM](img/proxmox-change-vm-nic-vlan-vnet.png)
 Après l'avoir démarrée, la VM obtient une IP du DHCP d'OPNsense sur ce VLAN, ce qui est super. J'essaye également de ping une autre machine et ça fonctionne :
 ![Ping another machine in the same VLAN](img/proxmox-console-ping-vm-vlan-66.png)
 ---
 ## Mise à jour de Cloud-Init et Terraform
 Pour aller plus loin, j’ai mis à jour le pont réseau utilisé dans mon **template cloud-init**, dont j'avais détaillé la création dans [cet article]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).  
 Comme avec la VM précédente, j’ai remplacé `vmbr0` et le tag VLAN 66 par le nouveau VNet `vlan66`.
 J’ai aussi adapté mon code **Terraform** pour refléter ce changement :  
 ![Mise à jour du code Terraform pour vlan66](img/terraform-code-update-vlan66.png)
 Ensuite, j’ai validé qu’aucune régression n’était introduite en déployant une VM de test :
 ```bash
 terraform apply -var 'vm_name=vm-test-vnet'
 ```
 ```plaintext
 data.proxmox_virtual_environment_vms.template: Reading...
 data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=23b17aea-d9f7-4f28-847f-41bb013262ea]
 [...]
 Plan: 2 to add, 0 to change, 0 to destroy.
 Changes to Outputs:
  + vm_ip = (known after apply)
 Do you want to perform these actions?
  Terraform will perform the actions described above.
  Only 'yes' will be accepted to approve.
  Enter a value: yes
 proxmox_virtual_environment_file.cloud_config: Creating...
 proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
 proxmox_virtual_environment_vm.vm: Creating...
 proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
 [...]
 proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
 proxmox_virtual_environment_vm.vm: Creation complete after 3m9s [id=119]
 Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
 Outputs:
 vm_ip = "192.168.66.181"
 ```
 La création s’est déroulée sans problème, tout est bon :
 ![VM déployée par Terraform sur vlan66](img/proxmox-terraform-test-deploy-vlan66.png)
 ---
 ## Conclusion
 La mise en place du SDN Proxmox avec une **zone VLAN** est simple et très pratique. Au lieu de définir manuellement un VLAN sur chaque VM, je sélectionne désormais directement le bon VNet, et tout reste cohérent dans le cluster.
 | Étape                | Avant SDN                     | Après SDN                           |
 | -------------------- | ----------------------------- | ----------------------------------- |
 | Rattacher une VM     | `vmbr0` + tag VLAN manuel     | Sélection du VNet approprié         |
 | VLANs sur les nœuds  | Config répété sur chaque nœud | Centralisé via le SDN du cluster    |
 | Gestion des adresses | Manuel ou via DHCP uniquement | IPAM optionnel via sous-réseaux SDN |
 Mon cluster est maintenant prêt à héberger mon **routeur OPNsense**, et cette base ouvre la voie à d’autres expérimentations, comme les overlays VXLAN ou l’EVPN avec BGP.
 À suivre pour la prochaine étape !
--- a/content/post/11-proxmox-cluster-networking-sdn.md
+++ b/content/post/11-proxmox-cluster-networking-sdn.md
@@ -0,0 +1,148 @@
 ---
 slug: proxmox-cluster-networking-sdn
 title: Simplifying VLAN Management in Proxmox VE with SDN
 description: Learn how to centralize VLAN configuration in Proxmox VE using SDN zones and VNets, making VM networking easier and more consistent.
 date: 2025-09-12
 draft: false
 tags:
  - proxmox
 categories:
  - homelab
 ---
 ## Intro
 When I first built my **Proxmox VE 8** cluster, networking wasn’t my main concern. I just wanted to replace an old physical server quickly, so I gave each of my three nodes the same basic config, created the cluster, and started running VMs:
 ![Configuration réseau d’un nœud Proxmox](img/proxmox-node-network-configuration.png)
 That worked fine for a while. But as I plan to virtualize my **OPNsense** router, I need something more structured and consistent. This is where Proxmox **S**oftware-**D**efined **N**etworking (SDN) feature comes in.
 ---
 ## My Homelab Network
 By default, every Proxmox node comes with its own local zone, called `localnetwork`, which contains the default Linux bridge (`vmbr0`) as a VNet:
 ![Proxmox default `localnetwork` zones](img/proxmox-default-localnetwork-zone.png)
 That’s fine for isolated setups, but at the cluster level nothing is coordinated.
 What I want is simple: declare the VLANs I already use in my network, so I can attach VMs to them easily from any node.
 Here’s the list of VLANs I use today:
 | Name      | ID   | Purpose                      |
 | --------- | ---- | ---------------------------- |
 | Mgmt      | 1    | Management                   |
 | User      | 13   | Home network                 |
 | IoT       | 37   | IoT and untrusted equipments |
 | DMZ       | 55   | Internet facing              |
 | Lab       | 66   | Lab network                  |
 | Heartbeat | 77   | Proxmox cluster heartbeat    |
 | Ceph      | 99   | Ceph storage                 |
 | VPN       | 1337 | Wireguard network            |
 ---
 ## Proxmox SDN Overview
 Proxmox Software-Defined Networking makes it possible to define cluster-wide virtual zones and networks. Instead of repeating VLAN configs on every node, SDN gives you a central view and ensures consistency.
 Under the hood, Proxmox mostly uses standard Linux networking, avoiding extra dependencies and keeping things stable.
 SDN configurations are stored in `/etc/pve/sdn`, which is replicated across the cluster. Changes are applied atomically (you prepare them, then hit `Apply` once), making rollouts safer.
 ### Zones
 A **Zone** defines a separate networking domain. Zones can span specific nodes and contain **VNets**.
 Proxmox supports several zone types:
 - **Simple**: Isolated Bridge. A simple layer 3 routing bridge (NAT)
 - **VLAN**: Virtual LANs are the classic method of subdividing a LAN
 - **QinQ**: Stacked VLAN (IEEE 802.1ad)
 - **VXLAN**: Layer 2 VXLAN network via a UDP tunnel
 - **EVPN**: VXLAN with BGP to establish Layer 3 routing
 Since my home network already relies on VLANs, I created a **VLAN Zone** named `homelan`, using `vmbr0` as the bridge and applying it cluster-wide:
 ![Create a VLAN zone in the Proxmox SDN](img/proxmox-create-vlan-zone-homelan.png)
 ### VNets
 A **VNet** is a virtual network inside a zone. In a VLAN zone, each VNet corresponds to a specific VLAN ID.
 I started by creating `vlan55` in the `homelan` zone for my DMZ network:
 ![Create a VNet for VLAN 55 in the homelan zone](img/proxmox-create-vlan-vnet-homelan.png)
 Then I added VNets for most of my VLANs, since I plan to attach them to an OPNsense VM:
 ![All my VLANs created in the Proxmox SDN](img/proxmox-sdn-all-vlan-homelan.png)
 Finally, I applied the configuration in **Datacenter → SDN**:
 ![Application de la configuration SDN dans Proxmox](img/proxmox-apply-sdn-homelan-configuration.png)
 ---
 ## Test the Network Configuration
 In a old VM which I don't use anymore, I replace the current `vmbr0` with VLAN tag 66 to my new VNet `vlan66`:
 ![Change the network bridge in a VM](img/proxmox-change-vm-nic-vlan-vnet.png)
 After starting it, the VM gets an IP from the DHCP on OPNsense on that VLAN, which sounds good. I also try to ping another machine and it works:
 ![Ping another machine in the same VLAN](img/proxmox-console-ping-vm-vlan-66.png)
 ---
 ## Update Cloud-Init Template and Terraform
 To go further, I update the bridge used in my **cloud-init** template, which I detailed the creation in that [post]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}). Pretty much the same thing I've done with the VM, I replace the current `vmbr0` with VLAN tag 66 with my new VNet `vlan66`.
 I also update the **Terrafom** code to take this change into account:
 ![Mise à jour du code Terraform pour vlan66](img/terraform-code-update-vlan66.png)
 I quicky check if I don't have regression and can still deploy a VM with Terraform:
 ```bash
 terraform apply -var 'vm_name=vm-test-vnet'
 ```
 ```plaintext
 data.proxmox_virtual_environment_vms.template: Reading...
 data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=23b17aea-d9f7-4f28-847f-41bb013262ea]
 [...]
 Plan: 2 to add, 0 to change, 0 to destroy.
 Changes to Outputs:
  + vm_ip = (known after apply)
 Do you want to perform these actions?
  Terraform will perform the actions described above.
  Only 'yes' will be accepted to approve.
  Enter a value: yes
 proxmox_virtual_environment_file.cloud_config: Creating...
 proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
 proxmox_virtual_environment_vm.vm: Creating...
 proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
 [...]
 proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
 proxmox_virtual_environment_vm.vm: Creation complete after 3m9s [id=119]
 Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
 Outputs:
 vm_ip = "192.168.66.181"
 ```
 The VM is deploying without any issue, everything is OK:
 ![VM déployée par Terraform sur vlan66](img/proxmox-terraform-test-deploy-vlan66.png)
 ---
 ## Conclusion
 Setting up Proxmox SDN with a **VLAN zone** turned out to be straightforward and very useful. Instead of tagging VLANs manually per VM, I now just pick the right VNet, and everything stays consistent across the cluster.
 | Step              | Before SDN                      | After SDN                      |
 | ----------------- | ------------------------------- | ------------------------------ |
 | Attach VM to VLAN | `vmbr0` + set VLAN tag manually | Select the right VNet directly |
 | VLANs on nodes    | Repeated config per node        | Centralized in cluster SDN     |
 | IP management     | Manual or DHCP only             | Optional IPAM via SDN subnets  |
 This prepares my cluster to host my **OPNsense router**, and it also sets the stage for future experiments, like trying out VXLAN overlays or EVPN with BGP.
 See you next time for the next step!
--- a/content/post/12-opnsense-virtualization-highly-available.fr.md
+++ b/content/post/12-opnsense-virtualization-highly-available.fr.md
@@ -0,0 +1,281 @@
 ---
 slug: opnsense-virtualization-highly-available
 title: Construire un Cluster OPNsense Hautement Disponible sur Proxmox VE
 description: Une preuve de concept montrant comment virtualiser OPNsense sur Proxmox VE, configurer la haute disponibilité avec CARP et pfSync, et gérer une seule IP WAN.
 date: 2025-09-29
 draft: false
 tags:
  - opnsense
  - proxmox
  - high-availability
 categories:
  - homelab
 ---
 ## Intro
 J’ai récemment rencontré mon premier vrai problème, ma box **OPNsense** physique a planté à cause d’un _kernel panic_. J’ai détaillé ce qu'il s'est passé dans [cet article]({{< ref "post/10-opnsense-crash-disk-panic" >}}).
 Cette panne m’a fait repenser mon installation. Un seul pare-feu est un point de défaillance unique, donc pour améliorer la résilience j’ai décidé de prendre une nouvelle approche : **virtualiser OPNsense**.
 Évidemment, faire tourner une seule VM ne suffirait pas. Pour obtenir une vraie redondance, il me faut deux instances OPNsense en **Haute Disponibilité**, l’une active et l’autre en attente.
 Avant de déployer ça sur mon réseau, j’ai voulu valider l’idée dans mon homelab. Dans cet article, je vais détailler la preuve de concept : déployer deux VM OPNsense dans un cluster **Proxmox VE** et les configurer pour fournir un pare-feu hautement disponible.
 ---
 ## Infrastructure Actuelle
 Au sommet de mon installation, mon modem FAI, une _Freebox_ en mode bridge, relié directement à l’interface `igc0` de ma box OPNsense, servant d’interface **WAN**. Sur `igc1`, le **LAN** est connecté à mon switch principal via un port trunk, avec le VLAN 1 comme VLAN natif pour mon réseau de management.
 Ce switch relie également mes trois nœuds Proxmox, chacun sur un port trunk avec le même VLAN natif. Chaque nœud dispose de deux cartes réseau : une pour le trafic général, et l’autre dédiée au réseau de stockage Ceph, connecté à un switch séparé de 2,5 Gbps.
 Depuis le crash d’OPNsense, j’ai simplifié l’architecture en supprimant le lien LACP, qui n’apportait pas de réelle valeur :
 ![Current homelab network diagram](img/homelan-current-physical-layout.png)
 Jusqu’à récemment, le réseau Proxmox de mon cluster était très basique : chaque nœud était configuré individuellement sans véritable logique commune. Cela a changé après la découverte du SDN Proxmox, qui m’a permis de centraliser les définitions de VLAN sur l’ensemble du cluster. J’ai décrit cette étape dans [cet article]({{< ref "post/11-proxmox-cluster-networking-sdn" >}}).
 ---
 ## Preuve de Concept
 Place au lab. Voici les étapes principales :
 1. Ajouter quelques VLANs dans mon homelab
 2. Créer un faux routeur FAI
 3. Construire deux VMs OPNsense
 4. Configurer la haute disponibilité
 5. Tester la bascule
 ![Diagram of the POC for OPNsense high availability](img/poc-opnsense-diagram.png)
 ### Ajouter des VLANs dans mon homelab
 Pour cette expérimentation, je crée trois nouveaux VLANs :
 - **VLAN 101** : _POC WAN_
 - **VLAN 102** : _POC LAN_
 - **VLAN 103** : _POC pfSync_
 Dans l’interface Proxmox, je vais dans `Datacenter` > `SDN` > `VNets` et je clique sur `Create` :
 ![Create POC VLANs in the Proxmox SDN](img/proxmox-sdn-create-poc-vlans.png)
 Une fois les trois VLANs créés, j’applique la configuration.
 J’ajoute ensuite ces trois VLANs dans mon contrôleur UniFi. Ici, seul l’ID et le nom sont nécessaires, le contrôleur se charge de les propager via les trunks connectés à mes nœuds Proxmox VE.
 ### Créer une VM “Fausse Box FAI”
 Pour simuler mon modem FAI actuel, j’ai créé une VM appelée `fake-freebox`. Cette VM route le trafic entre les réseaux _POC WAN_ et _Lab_, et fait tourner un serveur DHCP qui ne délivre qu’un seul bail, exactement comme ma vraie Freebox en mode bridge.
 Cette VM dispose de 2 cartes réseau, que je configure avec Netplan :
 - `eth0` (_POC WAN_ VLAN 101) : adresse IP statique `10.101.0.254/24`
 - `enp6s19` (Lab VLAN 66) : adresse IP obtenue en DHCP depuis mon routeur OPNsense actuel, en amont
 ```yaml
 network:
  version: 2
  ethernets:
    eth0:
      addresses:
        - 10.101.0.254/24
    enp6s19:
      dhcp4: true
 ```
 J’active ensuite le routage IP pour permettre à cette VM de router le trafic :
 ```bash
 echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
 sudo sysctl -p
 ```
 Puis je configure du masquage (NAT) afin que les paquets sortant via le réseau Lab ne soient pas rejetés par mon OPNsense actuel :
 ```bash
 sudo iptables -t nat -A POSTROUTING -o enp6s19 -j MASQUERADE
 sudo apt install iptables-persistent -y
 sudo netfilter-persistent save
 ```
 J’installe `dnsmasq` comme serveur DHCP léger :
 ```bash
 sudo apt install dnsmasq -y
 ```
 Dans `/etc/dnsmasq.conf`, je configure un bail unique (`10.101.0.150`) et je pointe le DNS vers mon OPNsense actuel, sur le VLAN _Lab_ :
 ```
 interface=eth0
 bind-interfaces
 dhcp-range=10.101.0.150,10.101.0.150,255.255.255.0,12h
 dhcp-option=3,10.101.0.254      # default gateway = this VM
 dhcp-option=6,192.168.66.1      # DNS server  
 ```
 Je redémarre le service `dnsmasq` pour appliquer la configuration :
 ```bash
 sudo systemctl restart dnsmasq
 ```
 La VM `fake-freebox` est maintenant prête à fournir du DHCP sur le VLAN 101, avec un seul bail disponible.
 ### Construire les VMs OPNsense
 Je commence par télécharger l’ISO d’OPNsense et je l’upload sur un de mes nœuds Proxmox :  
 ![Upload de l’ISO OPNsense dans Proxmox](img/proxmox-upload-opnsense-iso.png)
 #### Création de la VM
 Je crée la première VM `poc-opnsense-1` avec les paramètres suivants :
 - Type d’OS : Linux (même si OPNsense est basé sur FreeBSD)
 - Type de machine : `q35`
 - BIOS : `OVMF (UEFI)`, stockage EFI sur mon pool Ceph
 - Disque : 20 Gio sur Ceph
 - CPU/RAM : 2 vCPU, 2 Gio de RAM
 - Cartes réseau :
    1. VLAN 101 (_POC WAN_)
    2. VLAN 102 (_POC LAN_)
    3. VLAN 103 (_POC pfSync_)
 ![OPNsense VM settings in Proxmox](img/proxmox-create-poc-vm-opnsense.png)
 ℹ️ Avant de la démarrer, je clone cette VM pour préparer la seconde : `poc-opnsense-2`
 Au premier démarrage, je tombe sur une erreur “access denied”. Pour corriger, j’entre dans le BIOS, **Device Manager > Secure Boot Configuration**, je décoche _Attempt Secure Boot_ et je redémarre :  
 ![Disable Secure Boot in Proxmox BIOS](img/proxmox-disable-secure-boot-option.png)
 #### Installation d’OPNsense
 La VM démarre sur l’ISO, je ne touche à rien jusqu’à l’écran de login :  
 ![OPNsense CLI login screen in LiveCD](img/opnsense-vm-installation-welcome.png)
 Je me connecte avec `installer` / `opnsense` et je lance l’installateur. Je sélectionne le disque QEMU de 20 Go comme destination et je démarre l’installation :  
 ![Barre de progression de l’installation OPNsense](img/opnsense-vm-installation-progress-bar.png)
 Une fois terminé, je retire l’ISO du lecteur et je redémarre la machine.
 #### Configuration de Base d’OPNsense
 Au redémarrage, je me connecte avec `root` / `opnsense` et j’arrive au menu CLI :  
 ![Menu CLI après une installation fraîche d’OPNsense](img/opnsense-vm-installation-cli-menu.png)
 Avec l’option 1, je réassigne les interfaces :  
 ![Configuration des interfaces dans OPNsense via le CLI](img/opnsense-vm-installation-assign-interfaces.png)
 L’interface WAN récupère bien `10.101.0.150/24` depuis la `fake-freebox`. Je configure le LAN sur `10.102.0.2/24` et j’ajoute un pool DHCP de `10.102.0.10` à `10.102.0.99` :  
 ![Interface WAN OPNsense recevant une IP depuis la VM `fake-freebox`](img/opnsense-vm-installation-interfaces-configured.png)
 ✅ La première VM est prête, je reproduis l’opération pour la seconde OPNsense `poc-opnsense-2`, qui aura l’IP `10.102.0.3`.
 ### Configurer OPNsense en Haute Disponibilité
 Avec les deux VMs OPNsense opérationnelles, il est temps de passer à la configuration via le WebGUI. Pour y accéder, j’ai connecté une VM Windows au VLAN _POC LAN_ et ouvert l’IP de l’OPNsense sur le port 443 :  
 ![OPNsense WebGUI depuis une VM Windows](img/opnsense-vm-webgui-from-poc-lan.png)
 #### Ajouter l’Interface pfSync
 La troisième carte réseau (`vtnet2`) est assignée à l’interface _pfSync_. Ce réseau dédié permet aux deux firewalls de synchroniser leurs états via le VLAN _POC pfSync_ :  
 ![Add pfSync interface in OPNsense](img/opnsense-vm-assign-pfsync-interface.png)
 J’active l’interface sur chaque instance et je leur attribue une IP statique :
 - **poc-opnsense-1** : `10.103.0.2/24`
 - **poc-opnsense-2** : `10.103.0.3/24`
 Puis, j’ajoute une règle firewall sur chaque nœud pour autoriser tout le trafic provenant de ce réseau sur l’interface _pfSync_ :  
 ![Create new firewall rule on pfSync interface to allow any traffic in that network](img/opnsense-vm-firewall-allow-pfsync.png)
 #### Configurer la Haute Disponibilité
 Direction `System` > `High Availability` > `Settings`.
 - Sur le master (`poc-opnsense-1`), je configure les `General Settings` et les `Synchronization Settings`.
 - Sur le backup (`poc-opnsense-2`), seuls les `General Settings` suffisent (on ne veut pas qu’il écrase la config du master).  
 ![OPNsense High Availability settings](img/opnsense-vm-high-availability-settings.png)
 Une fois appliqué, je vérifie la synchro dans l’onglet `Status` :  
 ![OPNsense High Availability status](img/opnsense-vm-high-availability-status.png)
 #### Créer une IP Virtuelle
 Pour fournir une passerelle partagée aux clients, je crée une IP virtuelle (VIP) en **CARP** (Common Address Redundancy Protocol) sur l’interface LAN. L’IP est portée par le nœud actif et bascule automatiquement en cas de failover.
 Menu : `Interfaces` > `Virtual IPs` > `Settings` :  
 ![Create CARP virtual IP in OPNsense](img/opnsense-vm-create-vip-carp.png)
 Je réplique ensuite la config depuis `System > High Availability > Status` avec le bouton `Synchronize and reconfigure all`.
 Sur `Interfaces > Virtual IPs > Status`, le master affiche la VIP en `MASTER` et le backup en `BACKUP`.
 #### Reconfigurer le DHCP
 Pour la HA, il faut adapter le DHCP. Comme **Dnsmasq** ne supporte pas la synchro des baux, chaque instance doit répondre indépendamment.
 Sur le master :
 - `Services` > `Dnsmasq DNS & DHCP` > `General` : cocher `Disable HA sync`
 - `DHCP ranges` : cocher aussi `Disable HA sync`
 - `DHCP options` : ajouter l’option `router [3]` avec la valeur `10.102.0.1` (VIP LAN)
 - `DHCP options` : cloner la règle pour `dns-server [6]` vers la même VIP.  
 ![Edit DHCP options for Dnsmasq in OPNsense](img/opnsense-vm-dnsmasq-add-option.png)
 Sur le backup :
 - `Services` > `Dnsmasq DNS & DHCP` > `General` : cocher `Disable HA sync`
 - Régler `DHCP reply delay` à `5` secondes (laisser la priorité au master)
 - `DHCP ranges` : définir un autre pool, plus petit (`10.102.0.200 -> 220`).
 Ainsi, seules les **options** DHCP sont synchronisées, les plages restant distinctes.
 #### Interface WAN
 Mon modem FAI n’attribue qu’une seule IP en DHCP, je ne veux pas que mes 2 VMs entrent en compétition. Pour gérer ça :
 1. Dans Proxmox, je copie l’adresse MAC de `net0` (WAN) de `poc-opnsense-1` et je l’applique à `poc-opnsense-2`. Ainsi, le bail DHCP est partagé.  
 ⚠️ Si les deux VMs activent la même MAC en même temps, cela provoque des conflits ARP et peut casser le réseau. Seul le MASTER doit activer son WAN.
 2. Un hook event CARP procure la possibilité de lancer des scripts. J’ai déployé ce [script Gist](https://gist.github.com/spali/2da4f23e488219504b2ada12ac59a7dc#file-10-wancarp) dans `/usr/local/etc/rc.syshook.d/carp/10-wan` sur les deux nœuds. Ce script active le WAN uniquement sur le MASTER.
 ```php
 #!/usr/local/bin/php
 <?php
 require_once("config.inc");
 require_once("interfaces.inc");
 require_once("util.inc");
 require_once("system.inc");
 $subsystem = !empty($argv[1]) ? $argv[1] : '';
 $type = !empty($argv[2]) ? $argv[2] : '';
 if ($type != 'MASTER' && $type != 'BACKUP') {
    log_error("Carp '$type' event unknown from source '{$subsystem}'");
    exit(1);
 }
 if (!strstr($subsystem, '@')) {
    log_error("Carp '$type' event triggered from wrong source '{$subsystem}'");
    exit(1);
 }
 $ifkey = 'wan';
 if ($type === "MASTER") {
    log_error("enable interface '$ifkey' due CARP event '$type'");
    $config['interfaces'][$ifkey]['enable'] = '1';
    write_config("enable interface '$ifkey' due CARP event '$type'", false);
    interface_configure(false, $ifkey, false, false);
 } else {
    log_error("disable interface '$ifkey' due CARP event '$type'");
    unset($config['interfaces'][$ifkey]['enable']);
    write_config("disable interface '$ifkey' due CARP event '$type'", false);
    interface_configure(false, $ifkey, false, false);
 }
 ```
 ### Tester le Failover
 Passons aux tests !
 OPNsense propose un _CARP Maintenance Mode_. Avec le master actif, seul lui avait son WAN monté. En activant le mode maintenance, les rôles basculent : le master devient backup, son WAN est désactivé et celui du backup est activé :  
 ![Mode maintenance CARP dans OPNsense](img/opnsense-vm-carp-status.png)
 Pendant mes pings vers l’extérieur, aucune perte de paquets au moment du basculement.
 Ensuite, j’ai simulé un crash en éteignant le master. Le backup a pris le relais de façon transparente, seulement un paquet perdu, et grâce à la synchro des états, même ma session SSH est restée ouverte. 🎉
 ## Conclusion
 Cette preuve de concept démontre qu’il est possible de faire tourner **OPNsense en haute dispo sous Proxmox VE**, même avec une seule IP WAN. Les briques nécessaires :
 - Segmentation VLAN
 - Réseau dédié pfSync
 - IP virtuelle partagée (CARP)
 - Script pour gérer l’interface WAN
 Le résultat est à la hauteur : failover transparent, synchro des états, et connexions actives qui survivent à un crash.  Le point le plus délicat reste la gestion du bail WAN, mais le hook CARP règle ce problème.
 🚀 Prochaine étape : préparer un nouveau cluster OPNsense HA sur Proxmox en vue de remplacer complètement ma box physique actuel. Restez à l'écoute !
--- a/content/post/12-opnsense-virtualization-highly-available.md
+++ b/content/post/12-opnsense-virtualization-highly-available.md
@@ -0,0 +1,283 @@
 ---
 slug: opnsense-virtualization-highly-available
 title: Build a Highly Available OPNsense Cluster on Proxmox VE
 description: A proof of concept showing how to virtualize OPNsense on Proxmox VE, configure high availability with CARP and pfSync and handle a single WAN IP.
 date: 2025-09-29
 draft: false
 tags:
  - opnsense
  - proxmox
  - high-availability
 categories:
  - homelab
 ---
 ## Intro
 I recently encountered my first real problem, my physical **OPNsense** box crashed because of a kernel panic, I've detailed what happened in that [post]({{< ref "post/10-opnsense-crash-disk-panic" >}}). 
 That failure made me rethink my setup. A unique firewall is a single point of failure, so to improve resilience I decided to take a new approach: **virtualize OPNsense**.
 Of course, just running one VM wouldn’t be enough. To get real redundancy, I need two OPNsense instances in **High Availability**, with one active and the other standing by.
 Before rolling this out in my network, I wanted to demonstrate the idea in my homelab. In this post, I’ll walk through the proof of concept: deploying two OPNsense VMs inside a **Proxmox VE** cluster and configuring them to provide a highly available firewall.
 ---
 ## Current Infrastructure
 On top of my setup, my ISP modem, a *Freebox* in bridge mode, connects directly to the `igc0` interface of my OPNsense box, serving as the **WAN**. On `igc1`, the **LAN** is linked to my main switch using a trunk port, with VLAN 1 as the native VLAN for my management network.
 The switch also connects my three Proxmox nodes, each on trunk ports with the same native VLAN. Every node has two NICs: one for general networking and the other dedicated to the Ceph storage network, which runs through a separate 2.5 Gbps switch.
 Since the OPNsense crash, I’ve simplified things by removing the LACP link, it wasn’t adding real value:
 ![Current homelab network diagram](img/homelan-current-physical-layout.png)
 Until recently, Proxmox networking on my cluster was very basic: each node was configured individually with no real overlay logic. That changed after I explored Proxmox SDN, where I centralized VLAN definitions across the cluster. I described that step in [this article]({{< ref "post/11-proxmox-cluster-networking-sdn" >}}).
 ---
 ## Proof of Concept
 Time to move into the lab. Here are the main steps:
 1. Add some VLANs in my Homelab
 2. Create Fake ISP router
 3. Build two OPNsense VMs
 4. Configure high availability
 5. Test failover
 ![Diagram of the POC for OPNsense high availability](img/poc-opnsense-diagram.png)
 ### Add VLANs in my Homelab
 For this experiment, I create 3 new VLANs:
 - **VLAN 101**: *POC WAN* 
 - **VLAN 102**: *POC LAN*
 - **VLAN 103**: *POC pfSync*
 In the Proxmox UI, I navigate to `Datacenter` > `SDN` > `VNets` and I click `Create`:
 ![Create POC VLANs in the Proxmox SDN](img/proxmox-sdn-create-poc-vlans.png)
 Once the 3 new VLAN have been created, I apply the configuration.
 Additionally, I add these 3 VLANs in my UniFi Controller. Here only the VLAN ID and name are needed, since the controller will propagate them through the trunks connected to my Proxmox VE nodes.
 ### Create “Fake ISP Box“ VM
 To simulate my current ISP modem, I built a VM named `fake-freebox`. This VM routes traffic between the *POC WAN* and *Lab* networks and runs a DHCP server that serves only one lease, just like my real Freebox in bridge mode.
 This VM has 2 NICs, I configure Netplan with:
 - `eth0` (*POC WAN* VLAN 101): static IP address `10.101.0.254/24`
 - enp6s19 (Lab VLAN 66): DHCP address given by my current OPNsense router, in upstream
 ```yaml
 network:
  version: 2
  ethernets:
    eth0:
      addresses:
        - 10.101.0.254/24
    enp6s19:
      dhcp4: true
 ```
 I enable packet forward to allow this VM to route traffic:
 ```bash
 echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
 sudo sysctl -p
 ```
 Then I set up masquerading so packets leaving through the lab network wouldn’t be dropped by my current OPNsense:
 ```bash
 sudo iptables -t nat -A POSTROUTING -o enp6s19 -j MASQUERADE
 sudo apt install iptables-persistent -y
 sudo netfilter-persistent save
 ```
 I install `dnsmasq` as a lightweight DHCP server:
 ```bash
 sudo apt install dnsmasq -y
 ```
 In `/etc/dnsmasq.conf`, I configure to serve exactly one lease (`10.101.0.150`) with DNS pointing to my current OPNsense router, in the *Lab* VLAN:
 ```
 interface=eth0
 bind-interfaces
 dhcp-range=10.101.0.150,10.101.0.150,255.255.255.0,12h
 dhcp-option=3,10.101.0.254      # default gateway = this VM
 dhcp-option=6,192.168.66.1      # DNS server  
 ```
 I restart the `dnsmasq` service to apply the configuration:
 ```bash
 sudo systemctl restart dnsmasq
 ```
 The `fake-freebox` VM is now ready to serve DHCP on VLAN 101 and serve only one lease.
 ### Build OPNsense VMs
 First I download the OPNsense ISO and upload it to one of my Proxmox nodes:
 ![Upload de l’ISO OPNsense dans Proxmox](img/proxmox-upload-opnsense-iso.png)
 #### VM Creation
 I create the first VM `poc-opnsense-1`, with the following settings:
 - OS type:  Linux(even though OPNsense is FreeBSD-based)
 - Machine type: `q35`
 - BIOS: `OVMF (UEFI)`, EFI storage on my Ceph pool
 - Disk: 20 GiB also on Ceph
 - CPU/RAM: 2 vCPU, 2 GiB RAM
 - NICs:
 	1. VLAN 101 (POC WAN)
 	2. VLAN 102 (POC LAN)
 	3. VLAN 103 (POC pfSync)
 ![OPNsense VM settings in Proxmox](img/proxmox-create-poc-vm-opnsense.png)
 ℹ️ Before booting it, I clone this VM to prepare the second one: `poc-opnsense-2`
 On first boot, I hit an “access denied” error. To fix this, I enter the BIOS, go to **Device Manager > Secure Boot Configuration**, uncheck _Attempt Secure Boot_, and restart the VM:
 ![Disable Secure Boot in Proxmox BIOS](img/proxmox-disable-secure-boot-option.png)
 #### OPNsense Installation
 The VM boots on the ISO, I touch nothing until I get into the login screen:
 ![OPNsense CLI login screen in LiveCD](img/opnsense-vm-installation-welcome.png)
 I log in as `installer` / `opnsense` and launch the installer. I select the QEMU hard disk of 20GB as destination and launch the installation:
 ![Barre de progression de l’installation OPNsense](img/opnsense-vm-installation-progress-bar.png)
 Once the installation is finished, I remove the ISO from the drive and restart the machine.
 #### OPNsense Basic Configuration
 After reboot, I log in as `root` / `opnsense` and get into the CLI menu:
 ![Menu CLI après une installation fraîche d’OPNsense](img/opnsense-vm-installation-cli-menu.png)
 Using option 1, I reassigned interfaces:
 ![Configuration des interfaces dans OPNsense via le CLI](img/opnsense-vm-installation-assign-interfaces.png)
 The WAN interface successfully pulled `10.101.0.150/24` from the `fake-freebox`. I set the LAN interface to `10.102.0.2/24` and configured a DHCP pool from `10.102.0.10` to `10.102.0.99`:
 ![Interface WAN OPNsense recevant une IP depuis la VM `fake-freebox`](img/opnsense-vm-installation-interfaces-configured.png)
 ✅ The first VM is ready, I start over for the second OPNsense VM, `poc-opnsense-2` which will have the IP `10.102.0.3`
 ### Configure OPNsense Highly Available
 With both OPNsense VMs operational, it’s time to configure them from the WebGUI. To access the interface, I connected a Windows VM into the _POC LAN_ VLAN and browsed to the OPNsense IP on port 443:
 ![OPNsense WebGUI depuis une VM Windows](img/opnsense-vm-webgui-from-poc-lan.png)
 #### Add pfSync Interface
 The third NIC (`vtnet2`) is assigned to the _pfSync_ interface. This dedicated network allows the two firewalls to synchronize states on the VLAN *POC pfSync*:
 ![Add pfSync interface in OPNsense](img/opnsense-vm-assign-pfsync-interface.png)
 I enable the interface on each instance and configure it with a static IP address:
 - **poc-opnsense-1**: `10.103.0.2/24`
 - **poc-opnsense-2**: `10.103.0.3/24`
 Then, I add a firewall rule on each node to allow all traffic coming from this network on that *pfSync* interface:
 ![Create new firewall rule on pfSync interface to allow any traffic in that network](img/opnsense-vm-firewall-allow-pfsync.png)
 #### Setup High Availability
 Next, in `System` > `High Availability` > `Settings`.
 - On the master (`poc-opnsense-1`), I configure both the `General Settings` and the `Synchronization Settings`.
 - On the backup (`poc-opnsense-2`), only `General Settings` are needed, you don't want your backup overwrite the master config.
 ![OPNsense High Availability settings](img/opnsense-vm-high-availability-settings.png)
 Once applied, I verify synchronization on the `Status` page:
 ![OPNsense High Availability status](img/opnsense-vm-high-availability-status.png)
 #### Create Virtual IP Address
 To provide a shared gateway for clients, I create a CARP Virtual IP (VIP) on the LAN interface. It is using the Common Address Redundancy Protocol. This IP is claimed by the active node and automatically fails over.
 Navigate to `Interfaces` > `Virtual IPs` > `Settings`:
 ![Create CARP virtual IP in OPNsense](img/opnsense-vm-create-vip-carp.png)
 To replicate the config, I go to `System > High Availability > Status` and click the button next to `Synchronize and reconfigure all`.
 On the `Interfaces > Virtual IPs > Status` page, the master show the VIP as `MASTER`, while the backup report `BACKUP`.
 #### Reconfigure DHCP
 For HA, I need to adjust the DHCP setup. Since **Dnsmasq** does not support lease synchronization, both instances must serve leases independently.
 On the master:
 - `Services` > `Dnsmasq DNS & DHCP` > `General`: tick the `Disable HA sync` box. 
 - `DHCP ranges`: also tick the `Disable HA sync` box
 - `DHCP options`: add the option `router [3]` with the value `10.102.0.1` (LAN VIP)
 - `DHCP options`: clone the rule for `router [6]` pointing to the same VIP.
 ![Edit DHCP options for Dnsmasq in OPNsense](img/opnsense-vm-dnsmasq-add-option.png)
 On the backup:
 - `Services` > `Dnsmasq DNS & DHCP` > `General`: also tick the `Disable HA sync` box
 - Set `DHCP reply delay` to `5` seconds, to give master priority to answer.
 - `DHCP ranges`: Use a different pool, smaller (`10.102.0.200` -> `220`)
 - but I also set the value `5` to `DHCP reply delay`. This would give enough time to the master node to provide a DHCP lease before the backup node. In `DHCP ranges`, I edit the current one and give a smaller pool, different than the master's. Here I also tick the `Disable HA sync` box.
 This way, only DHCP options sync between nodes, while lease ranges stay separate.
 #### WAN Interface
 My ISP modem only provides a single DHCP lease, I don't want my 2 VMs compete to claim it. To handle this:
 1. In Proxmox, I copy the MAC of the `net0` (WAN) interface from `poc-opnsense-1` and applied it to `poc-opnsense-2`. This way, the DHCP lease could be shared among the nodes.
 ⚠️ If both VMs bring up the same MAC, it can cause ARP conflicts and break connectivity, only the MASTER should keep its WAN active.
 2. CARP event hook provides the possibility to run scripts, I deployed this [Gist script](https://gist.github.com/spali/2da4f23e488219504b2ada12ac59a7dc#file-10-wancarp) in `/usr/local/etc/rc.syshook.d/carp/10-wan` on both nodes. This ensures the WAN is active only on the MASTER, avoiding conflicts.
 ```php
 #!/usr/local/bin/php
 <?php
 require_once("config.inc");
 require_once("interfaces.inc");
 require_once("util.inc");
 require_once("system.inc");
 $subsystem = !empty($argv[1]) ? $argv[1] : '';
 $type = !empty($argv[2]) ? $argv[2] : '';
 if ($type != 'MASTER' && $type != 'BACKUP') {
    log_error("Carp '$type' event unknown from source '{$subsystem}'");
    exit(1);
 }
 if (!strstr($subsystem, '@')) {
    log_error("Carp '$type' event triggered from wrong source '{$subsystem}'");
    exit(1);
 }
 $ifkey = 'wan';
 if ($type === "MASTER") {
    log_error("enable interface '$ifkey' due CARP event '$type'");
    $config['interfaces'][$ifkey]['enable'] = '1';
    write_config("enable interface '$ifkey' due CARP event '$type'", false);
    interface_configure(false, $ifkey, false, false);
 } else {
    log_error("disable interface '$ifkey' due CARP event '$type'");
    unset($config['interfaces'][$ifkey]['enable']);
    write_config("disable interface '$ifkey' due CARP event '$type'", false);
    interface_configure(false, $ifkey, false, false);
 }
 ```
 ### Test Failover
 Time for the real test!
 OPNsense provides a _CARP Maintenance Mode_. With the master active, WAN was enabled only on that node. Entering maintenance mode flipped the roles: the master became backup, its WAN disabled, while the backup enabled its WAN:
 ![Mode maintenance CARP dans OPNsense](img/opnsense-vm-carp-status.png)
 While pinging outside the network, I observed zero packet loss during the failover.
 Finally, I simulated a crash by powering off the master. The backup took over seamlessly, I saw only one dropped packet, and thanks to state synchronization, even my SSH session stayed alive. 🎉
 ## Conclusion
 This proof of concept showed that running **OPNsense in high availability on Proxmox VE** is possible, even with a single WAN IP address. To achieve this, I needed these components:
 - VLAN segmentation.
 - Dedicated pfSync network.
 - Shared virtual IP.
 - Script to manage the WAN interface.
 The setup behave exactly as expected, seamless failover, synchronized firewall states, and even live sessions surviving a node crash. The most delicate part was handling the WAN lease, since my ISP modem only provides one IP, but the CARP hook script solved that challenge.
 🚀 The next milestone will be to prepare a new OPNsense HA cluster with the aim to completely replace my current physical box. Stay tuned!
--- a/content/post/13-migration-opnsense-proxmox-highly-available.md
+++ b/content/post/13-migration-opnsense-proxmox-highly-available.md
@@ -0,0 +1,57 @@
 ---
 slug:
 title: Template
 description:
 date:
 draft: true
 tags:
  - opnsense
  - high-availability
  - proxmox
 categories:
 ---
 ## Intro
 In my previous [post]({{< ref "post/12-opnsense-virtualization-highly-available" >}}), I've set up a PoC to validate the possibility to create a cluster of 2 **OPNsense** VMs in **Proxmox VE** and make the firewall highly available.
 This time, I will cover the creation of my future OPNsense cluster from scratch, plan the cut over and finally migrate from my current physical box.
 ## Build the Foundation
 For the real thing, I'll have to connect the WAN, coming from my ISP box, to my main switch. For that I have to add a VLAN to transport this flow to my Proxmox nodes.
 ### UniFi
 The first thing I do is to configure my layer 2 network which is managed by UniFi. There I need to create two VLANs:
 - *WAN* (20): transport the WAN between my ISP box and my Proxmox nodes.
 - *pfSync* (44), communication between my OPNsense nodes.
 In the UniFi controller, in `Settings` > `Networks`, I add a `New Virtual Network`. I name it `WAN` and give it the VLAN ID 20:
 ![unifi-add-vlan-for-wan.png](img/unifi-add-vlan-for-wan.png)
 I do the same thing again for the `pfSync` VLAN with the VLAN ID 44.
 I will plug my ISP box on the port 15 of my switch, which is disabled for now. I set it as active, set the native VLAN on the newly created one `WAN (20)` and disable trunking:
 ![unifi-enable-port-wan-vlan.png](img/unifi-enable-port-wan-vlan.png)
 Once this setting applied, I make sure that only the ports where are connected my Proxmox nodes propagate these VLAN on their trunk. 
 We are done with UniFi configuration.
 ### Proxmox SDN
 Now that the VLAN can reach my nodes, I want to handle it in the Proxmox SDN.
 In `Datacenter` > `SDN` > `VNets`, I create a new VNet, name it `vlan20` to follow my own naming convention, give it the *WAN* alias and use the tag (ID) 20:
 ![proxmox-sdn-new-vnet-wan.png](img/proxmox-sdn-new-vnet-wan.png)
 I also create the `vlan44` for the *pfSync* VLAN, then I apply this configuration and we are done with the SDN.
 ## Create the VMs
 Now that the VLAN configuration is done, I can start buiding my VMs.
 I don't want to go into much details about the VM creation, I already detailed it in the previous  [post]({{< ref "post/12-opnsense-virtualization-highly-available" >}}).
 The first VM is named `cerbere-head1`
--- a/content/post/2-blog-deployment-obisidan-hugo-gitea-actions.fr.md
+++ b/content/post/2-blog-deployment-obisidan-hugo-gitea-actions.fr.md
@@ -53,7 +53,7 @@ L'idée est simple :
 1. J'écris le contenu de mon blog dans mon vault Obsidian, sous un dossier `Blog`.
 2. Une fois le fichier modifié, le plugin Git Obsidian effectue automatiquement les commits et les poussent vers le dépôt Gitea.
 3. Lorsque Gitea reçoit ce push, une première Gitea Action est déclenchée.
-4. La première action synchronise le contenu du blog mis à jour avec un autre dépôt [Git distinct](https://git.vezpi.me/Vezpi/blog) qui héberge le contenu.
+4. La première action synchronise le contenu du blog mis à jour avec un autre dépôt [Git distinct](https://git.vezpi.com/Vezpi/blog) qui héberge le contenu.
 5. Dans ce dépôt, une autre Gitea Action est déclenchée.
 6. La deuxième Gitea Action génère les pages web statiques tout en mettant à jour Hugo si nécessaire.
 7. Le blog est maintenant mis à jour (celui que vous lisez).
@@ -78,7 +78,7 @@ Le vault Obsidian est un dépôt Git privé self-hosted dans Gitea. J'utilise Do
    container_name: gitea_runner
    restart: on-failure
    environment:
-      - GITEA_INSTANCE_URL=https://git.vezpi.me
+      - GITEA_INSTANCE_URL=https://git.vezpi.com
      - GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
      - GITEA_RUNNER_NAME=self-hosted
      - GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
@@ -140,7 +140,7 @@ jobs:
        uses: actions/checkout@v4
      - name: Clone the blog repository
-        run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git 
+        run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git 
      - name: Transfer blog content from Obsidian
        run: |
--- a/content/post/2-blog-deployment-obisidan-hugo-gitea-actions.md
+++ b/content/post/2-blog-deployment-obisidan-hugo-gitea-actions.md
@@ -52,7 +52,7 @@ The idea is simple:
 1. I write blog content in my Obsidian vault, under a specific `Blog` folder.
 2. When I'm done editing the file, the Obisdian Git plugin automatically commits and push updates to the Gitea repository
 3. When Gitea receives that push, a first Gitea Action is triggered.
-4. The first action syncs the updated blog content to another separate [Git repository](https://git.vezpi.me/Vezpi/blog) which hosts my blog content.
+4. The first action syncs the updated blog content to another separate [Git repository](https://git.vezpi.com/Vezpi/blog) which hosts my blog content.
 5. In that blog repository, another Gitea Action is triggered.
 6. The second Gitea Action generates the static web pages while upgrading Hugo if needed
 7. The blog is now updated (the one you are reading).
@@ -77,7 +77,7 @@ The Obsidian vault is a private Git repository self-hosted in Gitea. I use docke
    container_name: gitea_runner
    restart: on-failure
    environment:
-      - GITEA_INSTANCE_URL=https://git.vezpi.me
+      - GITEA_INSTANCE_URL=https://git.vezpi.com
      - GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
      - GITEA_RUNNER_NAME=self-hosted
      - GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
@@ -139,7 +139,7 @@ jobs:
        uses: actions/checkout@v4
      - name: Clone the blog repository
-        run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git 
+        run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git 
      - name: Transfer blog content from Obsidian
        run: |
--- a/content/post/4-blog-deployment-ci-cd-pipeline-gitea-actions.fr.md
+++ b/content/post/4-blog-deployment-ci-cd-pipeline-gitea-actions.fr.md
@@ -1,7 +1,7 @@
 ---
 slug: blog-deployment-ci-cd-pipeline-gitea-actions
 title: Pipeline CI/CD du Déploiment du Blog avec Gitea Actions
-description: Comment j'ai sécurisé le déploiement automatisé de mon blog self-hosted construit avec Hugo en mettant en place un pipeline CI/CD à l'aide de Gitea Actions
+description: Comment j'ai sécurisé le déploiement automatisé de mon blog self-hosted construit avec Hugo en mettant en place un pipeline CI/CD à l'aide de Gitea Actions.
 date: 2025-06-05
 draft: false
 tags:
@@ -20,7 +20,7 @@ Le blog étant redéployé de façon automatique à chaque modification du conte
 ## Sécuriser le Déploiement du Blog
-Aujourd'hui mon blog se redéploie automatiquement à chaque modification de la branche `main` du [dépôt Git](https://git.vezpi.me/Vezpi/Blog) de mon instance **Gitea** via une **Gitea Actions**. Chaque modification apportée à mon vault **Obsidian** est poussée automatiquement dans cette branche.
+Aujourd'hui mon blog se redéploie automatiquement à chaque modification de la branche `main` du [dépôt Git](https://git.vezpi.com/Vezpi/Blog) de mon instance **Gitea** via une **Gitea Actions**. Chaque modification apportée à mon vault **Obsidian** est poussée automatiquement dans cette branche.
 ![Workflow depuis l'écriture de notes sur Obsidian au Blog publié](img/obsidian-blog-gitea-actions-workflow.png)
@@ -96,7 +96,7 @@ Par défaut, au lancement d'un conteneur `nginx`, il se contente de lancer le se
 set -e
 # Configuration
-REPO_URL="${REPO_URL:-https://git.vezpi.me/Vezpi/blog.git}"
+REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
 URL="${URL:-blog.vezpi.com}"
 BRANCH="${BRANCH:-preview}"
 CLONE_DIR="${CLONE_DIR:-/blog}"
@@ -177,7 +177,7 @@ Voici la nouvelle configuration de mon `runner` dans ma stack Gitea, gérée par
    container_name: gitea_runner
    restart: always
    environment:
-      - GITEA_INSTANCE_URL=https://git.vezpi.me
+      - GITEA_INSTANCE_URL=https://git.vezpi.com
      - GITEA_RUNNER_REGISTRATION_TOKEN=<token>
      - GITEA_RUNNER_NAME=self-hosted
      - GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
@@ -241,7 +241,7 @@ jobs:
      docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
    steps:
      - name: Checkout Repository
-        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
+        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
      - name: Check Latest Hugo Version
        id: get_latest
@@ -296,7 +296,7 @@ jobs:
        shell: sh
    steps:
      - name: Checkout Repository
-        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
+        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
      - name: Build Docker Image
        run: |  
--- a/content/post/4-blog-deployment-ci-cd-pipeline-gitea-actions.md
+++ b/content/post/4-blog-deployment-ci-cd-pipeline-gitea-actions.md
@@ -1,7 +1,7 @@
 ---
 slug: blog-deployment-ci-cd-pipeline-gitea-actions
 title: Blog Deployment CI/CD Pipeline using Gitea Actions
-description: How I secured the automated deployment of my self-hosted blog built with Hugo by setting up a CI/CD pipeline using Gitea Actions
+description: How I secured the automated deployment of my self-hosted blog built with Hugo by setting up a CI/CD pipeline using Gitea Actions.
 date: 2025-06-05
 draft: false
 tags:
@@ -20,7 +20,7 @@ Since the blog is automatically redeployed every time I modify content in Obsidi
 ## Securing the Blog Deployment
-Currently, my blog redeploys automatically on every change to the `main` branch of the [Git repository](https://git.vezpi.me/Vezpi/Blog) hosted on my **Gitea** instance, using a **Gitea Actions** workflow. Every change made in my **Obsidian** vault is automatically pushed to this branch.
+Currently, my blog redeploys automatically on every change to the `main` branch of the [Git repository](https://git.vezpi.com/Vezpi/Blog) hosted on my **Gitea** instance, using a **Gitea Actions** workflow. Every change made in my **Obsidian** vault is automatically pushed to this branch.
 ![Workflow depuis l'écriture de notes sur Obsidian au Blog publié](img/obsidian-blog-gitea-actions-workflow.png)
@@ -96,7 +96,7 @@ By default, a `nginx` container simply starts the web server. But here I wanted
 set -e
 # Configuration
-REPO_URL="${REPO_URL:-https://git.vezpi.me/Vezpi/blog.git}"
+REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
 URL="${URL:-blog.vezpi.com}"
 BRANCH="${BRANCH:-preview}"
 CLONE_DIR="${CLONE_DIR:-/blog}"
@@ -177,7 +177,7 @@ Here is the new configuration of my `runner` in my Gitea stack, also managed via
    container_name: gitea_runner
    restart: always
    environment:
-      - GITEA_INSTANCE_URL=https://git.vezpi.me
+      - GITEA_INSTANCE_URL=https://git.vezpi.com
      - GITEA_RUNNER_REGISTRATION_TOKEN=<token>
      - GITEA_RUNNER_NAME=self-hosted
      - GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
@@ -241,7 +241,7 @@ jobs:
      docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
    steps:
      - name: Checkout Repository
-        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
+        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
      - name: Check Latest Hugo Version
        id: get_latest
@@ -296,7 +296,7 @@ jobs:
        shell: sh
    steps:
      - name: Checkout Repository
-        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
+        run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
      - name: Build Docker Image
        run: |  
--- a/content/post/5-notification-system-gotify-vs-ntfy.fr.md
+++ b/content/post/5-notification-system-gotify-vs-ntfy.fr.md
@@ -0,0 +1,380 @@
 ---
 slug: notification-system-gotify-vs-ntfy
 title: Test de Gotify et Ntfy, un système de notifications self-hosted
 description: Gotify ou Ntfy ? J'ai testé les deux pour créer un système de notifications fiable et self-hosted pour mon homelab, et intégré à un pipeline CI/CD.
 date: 2025-06-13
 draft: false
 tags:
  - notification
  - ntfy
  - gotify
  - ci-cd
 categories:
  - homelab
 ---
 ## Intro
 Pour savoir ce qui se passe dans mon homelab et être averti quand quelque chose ne va pas, je veux mettre en place un système de notifications où (presque) n'importe quoi pourrait m'envoyer un message que je recevrais sur mon mobile.
 Par le passé, j’utilisais **Pushover**, qui était très bien, mais je veux explorer de nouvelles options, plus modernes et éventuellement self-hosted.
 ## Choisir le Bon Système de Notifications
 Les éléments clés pour déterminer le bon système pour moi seraient :
 - **Application Android** : obligatoire, une interface élégante et intuitive est important.
 - **Intégration** : je veux que le service soit intégré partout où je veux être notifié.
 - **Self-hosted** : l’héberger moi-même est toujours mieux pour la confidentialité.
 Après une recherche rapide, les outils les plus adaptés sur le marché sont :
 - **Ntfy**
 - **Gotify**
 Étant donné les commentaires sur internet et après avoir testé rapidement les deux applications Android, je ne peux pas vraiment décider. Je pense que Ntfy est la meilleure option, mais je vais installer et tester les deux pour me faire une idée !
 ## Gotify
 J’avais entendu parler de Gotify il y a quelque temps, en fait avant même de regarder d'autres alternatives, j'avais celui-ci en tête. J’ai rapidement jeté un œil à sa [documentation](https://gotify.net/docs/) et cela semble assez simple.
 ### Installation
 Comme d’habitude, je vais déployer le serveur Gotify avec `docker compose` sur `dockerVM`, une VM hébergeant mes applications sous forme de conteneurs Docker. Je crée un nouveau dossier `gotify` dans `/appli/docker/` et je colle mon template de `docker-compose.yml` dedans.
 `docker-compose.yml`
 ```yaml
 services:
  gotify:
    image: gotify/server
    container_name: gotify
    volumes:
      - /appli/data/gotify/data/:/app/data
    environment:
      - TZ=Europe/Paris
      - GOTIFY_DEFAULTUSER_NAME=${GOTIFY_DEFAULTUSER_NAME}
      - GOTIFY_DEFAULTUSER_PASS=${GOTIFY_DEFAULTUSER_PASS}
    networks:
      - web
    labels:
    - traefik.enable=true
    - traefik.http.routers.gotify.rule=Host(`gotify.vezpi.me`)
    - traefik.http.routers.gotify.entrypoints=https
    - traefik.http.routers.gotify.tls.certresolver=letsencrypt
    - traefik.http.services.gotify.loadbalancer.server.port=80
    restart: always
 networks:
  web:
    external: true
 ```
 `.env`
 ```
 GOTIFY_DEFAULTUSER_NAME=vez
 GOTIFY_DEFAULTUSER_PASS=<password>
 ```
 Dans la [documentation](https://gotify.net/docs/config), je vois que plusieurs moteurs de base de données peuvent être utilisés, par défaut c’est **sqlite3** qui est utilisé, ce qui ira très bien pour le test. Passer à **PostgreSQL** pourrait être une option si je décide de garder Gotify. Sur cette même page, je vois les différentes variables d’environnement que je peux utiliser pour configurer le serveur depuis le fichier `docker-compose.yml`.
 Quand mes fichiers de configuration sont prêts, je crée une nouvelle entrée dans mon plugin Caddy sur OPNsense pour rediriger ma nouvelle URL Gotify : [https://gotify.vezpi.me](https://gotify.vezpi.me).
 Je crée également le dossier `/appli/data/gotify/data/` dans `dockerVM` pour le monter comme volume et stocker les données :
 ```bash
 mkdir -p /appli/data/gotify/data/
 ```
 Enfin, je lance la stack docker :
 ```bash
 $ docker compose up -d
 [+] Running 5/5
 ✔ gotify Pulled
   ✔ 63ce8e957633 Pull complete
   ✔ e7def9680541 Pull complete
   ✔ 9a1821c438b4 Pull complete
   ✔ ad316556c9ff Pull complete
 [+] Running 1/1
 ✔ Container gotify  Started
 ```
 ✅ Atteindre l’URL [https://gotify.vezpi.me](https://gotify.vezpi.me) m’affiche la page de connexion Gotify :  
 ![Gotify login page](img/gotify-login-page.png)
 Après connexion, j’accède au tableau de bord, sans messages évidemment :  
 ![Gotify dashboard on a fresh installation](img/gotify-dashboard-no-messages.png)
 ### Créer une Application
 Pour permettre l’envoi de messages, je dois d’abord créer une application pour laquelle les messages seront regroupés. Cela peut se faire de deux manières :
 - **WebUI**
 - **REST-API**
 Pour le test, j’utiliserai la WebUI, je clique sur le bouton `APPS` en haut puis `CREATE APPLICATION`. Je choisis un magnifique nom d'application et une description.  
 ![Create an application on Gotify](img/gotify-create-new-application.png)
 Une fois mon application créée, un token est généré pour celle-ci. Je peux modifier l’application pour changer quoi que ce soit, je peux aussi uploader une icône.  
 ![Gotify application list showing my new Potato application](img/gotify-application-list.png)
 ### Tests
 Mon application est maintenant visible dans la barre latérale, testons maintenant l’envoi d’un message. Pour l’envoyer, je peux utiliser `curl` et j’ai besoin du token de l’application.
 ```bash
 curl "https://gotify.vezpi.me/message?token=<apptoken>" -F "title=Cooked!" -F "message=The potoaries are ready!" -F "priority=5"
 ```
 Je reçois instantanément la notification sur mon mobile et dans mon navigateur.
 Je renvoie un autre message mais avec une priorité plus basse : `-2`. Je ne reçois pas de notification dans mon navigateur, je remarque une légère différence entre les deux messages. Sur mon mobile, seule ma montre la reçoit, je ne la vois pas sur l’écran, mais je la retrouve dans le centre de notifications.  
 ![Messages received on Gotify WebUI](img/gotify-messages-received.png)
 ### Application Android
 Voici quelques captures d’écran depuis mon appareil Android :  
 ![Capture d’écran de l’application Android Gotify pour la page de connexion](img/gotify-android-first-login.png)
 Pour une raison inconnue, une notification apparaît aléatoirement pour me dire que je suis connecté à Gotify :  
 ![Capture d’écran de l’application Android Gotify avec les messages de test](img/gotify-android-test-messages.png)
 ### Conclusion
 Dans la [documentation](https://gotify.net/docs/msgextras), j’ai trouvé quelques fonctionnalités supplémentaires, comme l’ajout d’images ou d’actions cliquables. En résumé, ça fait le job, c’est tout. Le processus d’installation est simple, l’utilisation n’est pas compliquée, mais je dois créer une application pour obtenir un token, puis ajouter ce token à chaque fois que je veux envoyer un message.
 ## Ntfy
 Ntfy semble très propre, installons-le et voyons ce qu’il propose !
 ### Installation
 Même histoire ici avec `docker compose` sur `dockerVM`. Je crée un nouveau dossier `ntfy` dans `/appli/docker/` et je colle le template de `docker-compose.yml`.
 `docker-compose.yml`
 ```yaml
 services:
  ntfy:
    image: binwiederhier/ntfy
    container_name: ntfy
    command:
      - serve
    volumes:
      - /appli/data/ntfy/data:/var/cache/ntfy
    environment:
      - TZ=Europe/Paris
      - NTFY_BASE_URL=https://ntfy.vezpi.me
      - NTFY_CACHE_FILE=/var/cache/ntfy/cache.db
      - NTFY_AUTH_FILE=/var/cache/ntfy/auth.db
      - NTFY_ATTACHMENT_CACHE_DIR=/var/cache/ntfy/attachments
      - NTFY_AUTH_DEFAULT_ACCESS=deny-all
      - NTFY_BEHIND_PROXY=true
      - NTFY_ENABLE_LOGIN=true
    user: 1000:1000
    networks:
      - web
    labels:
    - traefik.enable=true
    - traefik.http.routers.ntfy.rule=Host(`ntfy.vezpi.me`)
    - traefik.http.routers.ntfy.entrypoints=https
    - traefik.http.routers.ntfy.tls.certresolver=letsencrypt
    - traefik.http.services.ntfy.loadbalancer.server.port=80
    healthcheck:
      test: ["CMD-SHELL", "wget -q --tries=1 http://ntfy:80/v1/health -O - | grep -Eo '\"healthy\"\\s*:\\s*true' || exit 1"]
      interval: 60s
      timeout: 10s
      retries: 3
      start_period: 40s
    restart: unless-stopped
 networks:
  web:
    external: true
 ```
 Je crée aussi le dossier de volume persistant `/appli/data/ntfy/data/` dans `dockerVM` :
 ```bash
 mkdir -p /appli/data/ntfy/data/
 ```
 La [documentation](https://docs.ntfy.sh/config/) est impressionnante, j’ai essayé de rassembler la config pour un démarrage rapide. Je devrais être bon pour lancer le serveur.
 Encore une fois ici, je crée un nouveau domaine pour mon proxy inverse Caddy sur OPNsense avec l’URL [https://ntfy.vezpi.me](https://ntfy.vezpi.me).
 ```bash
 $ docker compose up -d
 [+] Running 4/4
 ✔ ntfy Pulled
   ✔ f18232174bc9 Already exists
   ✔ f5bf7a328fac Pull complete
   ✔ 572c745ef6c3 Pull complete
 [+] Running 1/1
 ✔ Container ntfy  Started
 ```
 ✅ L’URL [https://ntfy.vezpi.me](https://ntfy.vezpi.me) me donne accès au tableau de bord Ntfy :  
 ![Ntfy dashboard](img/ntfy-login-dashboard.png)
 Au départ je n’ai aucun utilisateur et aucun n’est créé par défaut. Comme j’ai interdit tout accès anonyme dans la config, je dois en créer un.
 Pour lister les utilisateurs, je peux utiliser cette commande :
 ```bash
 $ docker exec -it ntfy ntfy user list
 user * (role: anonymous, tier: none)
 - no topic-specific permissions
 - no access to any (other) topics (server config)
 ```
 Je crée un utilisateur avec les privilèges d’administration :
 ```bash
 $ docker exec -it ntfy ntfy user add --role=admin vez
 user vez added with role admin
 ```
 Je peux maintenant me connecter à l’interface Web, et passer en mode sombre, mes yeux me remercient.
 ### Topics
 Dans Ntfy, il n’y a pas d’applications à créer, mais les messages sont regroupés dans des topics, plus lisibles qu’un token lors de l’envoi. Une fois le topic créé, je peux changer le nom d’affichage ou envoyer des messages de test. Sur l’interface Web, cependant, je ne trouve aucune option pour changer l’icône, alors que c’est possible depuis l’application Android, ce qui n’est pas très pratique.
 ![Example messages in Ntfy](img/ntfy-topic-messages.png)
 ### Tests
 Envoyer un message est en fait plus difficile que prévu. Comme j’ai activé l’authentification, je dois aussi m’authentifier pour envoyer des messages :
 ```
 curl \
  -H "Title: Cooked!" \
  -H "Priority: high" \
  -d "The potatoes are ready!" \
  -u "vez:<password>" \
  https://ntfy.vezpi.me/patato
 ```
 ### Application Android
 Voici quelques captures de l’application Android Ntfy :  
 ![Captures de l’application Android Ntfy](img/ntfy-android-app.png)
 ### Conclusion
 Ntfy est une belle application avec une [documentation](https://docs.ntfy.sh/) vraiment solide. Les possibilités sont infinies et la liste des intégrations est impressionnante. L’installation n’était pas difficile mais demandait un peu plus de configuration. Le besoin d’utiliser la CLI pour configurer les utilisateurs et les permissions n’est pas très pratique.
 Sur l’application Android, je regrette qu’il n’y ait pas une vue pour voir tous les messages des différents topics. En revanche, sur l’interface Web, j’aurais aimé pouvoir définir les icônes des topics. Ce que j’ai trouvé intéressant, c’est la possibilité d’avoir des topics depuis différents serveurs.
 ## Comparaison
 **Gotify** est simple, tous les utilisateurs auront accès à toutes les applications. Pas besoin d'identifiant utilisateur pour envoyer des messages, seulement le token de l’application. L’application Android est efficace, mais personnellement, même si l’icône est amusante, je ne l’aime pas trop.
 **Ntfy** semble plus avancé et complet, avec des permissions plus précises. L’interface est élégante tout en restant simple, les possibilités sont infinies.
 Dans l’ensemble, seuls de petits détails me font préférer Ntfy à Gotify, par exemple, avoir accès à des topics de différents serveurs, les ACL ou la possibilité d’ajouter des émojis aux messages, mais les deux applications remplissent bien leur rôle.
 ## Implémentation de Notifications Réelles
 Pendant que je mettais en place mon pipeline CI/CD pour le déploiement de mon blog, je voulais être averti chaque fois que quelque chose se passe, voyons comment je peux l’implémenter avec Ntfy.
 ### Contrôle d’Accès
 Je pourrais utiliser mon utilisateur `admin` pour envoyer les messages depuis le pipeline et les recevoir sur mon appareil Android, même si c’est plus simple à configurer, je veux appliquer le principe de moindre privilège, ce que Ntfy permet. Je vais donc créer un utilisateur dédié pour mon pipeline CI/CD et un autre pour mon appareil Android.
 #### Utilisateur Pipeline
 Celui-ci ne pourra qu'envoyer des messages sur le topic `blog`, je l’appelle `gitea_blog`.
 ```bash
 $ ntfy user add gitea_blog
 user gitea_blog added with role user
 $ ntfy access gitea_blog blog wo
 granted write-only access to topic blog
 user gitea_blog (role: user, tier: none)
 - write-only access to topic blog
 ```
 Je teste rapidement l’envoi d’un message sur ce topic :
 ```bash
 $ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/blog
 {"id":"xIgwz9dr1w9Z","time":1749587681,"expires":1749630881,"event":"message","topic":"blog","message":"Message test from gitea_blog!"}
 ```
 ![Test d’envoi de messages sur le topic blog avec Ntfy  ](img/ntfy-testing-gitea-blog-user.png)
 ✅ Message reçu !
 Je tente aussi un envoi sur mon topic de test :
 ```bash
 $ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/potato
 {"code":40301,"http":403,"error":"forbidden","link":"https://ntfy.sh/docs/publish/#authentication"}
 ```
 ❌ Refusé comme attendu.
 #### Utilisateur Android
 Depuis mon appareil Android, je veux uniquement recevoir les messages, mais sur tous les topics. Je crée l’utilisateur `android_s25u` :
 ```bash
 $ ntfy user add android_s25u
 user android_s25u added with role user
 $ ntfy access android_s25u "*" ro
 granted read-only access to topic *
 user android_s25u (role: user, tier: none)
 - read-only access to topic *
 ```
 ✅ Après avoir configuré l’utilisateur dans l’application Android Ntfy, je peux lire mes messages sur `https://ntfy.vezpi.me/blog` et aussi sur le topic de test.
 ### Implémentation
 Maintenant que mes utilisateurs sont prêts, je veux ajouter un job `Notify` dans mon pipeline CI/CD pour le déploiement du blog dans **Gitea**, vous pouvez retrouver le workflow complet dans [cet article]({{< ref "post/4-blog-deployment-ci-cd-pipeline-gitea-actions" >}}).
 #### Créer un Secret
 Pour permettre à mon Gitea Runner d’utiliser l’utilisateur `gitea_blog` dans ses jobs, je veux créer un secret. J’explore le dépôt Gitea `Blog` dans `Settings`, puis `Actions` > `Secrets` > `Add Secret`. J’y mets la valeur du secret au format `<utilisateur>:<password>` :  
 ![Add a secret in the blog Gitea repository](img/gitea-blog-ntfy-credentials.png)
 ### Écrire le Code `Notify`
 Je peux maintenant écrire le code qui m’enverra un message quand un nouveau déploiement se produit.
 Si le déploiement est un succès, la priorité sera minimale, pas besoin de notification sur mon mobile, juste pour garder une trace dans l’application Android Ntfy si besoin.
 Si quelque chose échoue, je veux être notifié sur mon mobile avec une priorité plus élevée. Ntfy me permet d’ajouter des actions sur mes notifications, je vais en créer 2 :
 - **View Run** : Lien direct vers le workflow dans Gitea pour voir ce qu’il s’est passé.
 - **Verify Blog** : Lien vers le blog pour vérifier qu’il est toujours en ligne.
 ```yaml
  Notify:
    needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
    runs-on: ubuntu
    if: always()
    env:
      NTFY_URL: https://ntfy.vezpi.me
      NTFY_TOPIC: blog
      NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
    steps:
      - name: Notify Workflow Result
        run: |
          if [[
            "${{ needs.Check-Rebuild.result }}" == "success" &&
           ("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
            "${{ needs.Deploy-Staging.result }}" == "success" &&
            "${{ needs.Test-Staging.result }}" == "success" && 
            "${{ needs.Merge.result }}" == "success" &&
            "${{ needs.Deploy-Production.result }}" == "success" &&
            "${{ needs.Test-Production.result }}" == "success" &&
           ("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
          ]]; then
            curl -H "Priority: min" \
                 -H "Tags: white_check_mark" \
                 -d "Blog workflow completed successfully." \
                 -u ${NTFY_TOKEN} \
                 ${NTFY_URL}/${NTFY_TOPIC}
          else
            curl -H "Priority: high" \
                 -H "Tags: x" \
                 -H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
                              view, Verify Blog, https://blog.vezpi.com, clear=true" \
                 -d "Blog workflow failed!" \
                 -u ${NTFY_TOKEN} \
                 ${NTFY_URL}/${NTFY_TOPIC}
          fi
 ```
 ✅ Test des deux cas, fonctionne comme prévu :
 ![Checking both test scenario in Ntfy WebUI](img/ntfy-testing-blog-notifications.png)
 ## Conclusion
 Après avoir testé **Gotify** et **Ntfy**, j’ai trouvé mon prochain système de notifications. Les deux sont bons pour le job, mais je devais en choisir un et j’ai une petite préférence pour Ntfy.
 L’application serait parfaite si je pouvais gérer les utilisateurs et les accès depuis l’interface Web. Aussi, je préférerais pouvoir gérer l’icône des topics globalement plutôt que depuis mon mobile.
 Quoi qu’il en soit, je suis très satisfait du résultat de cette première implémentation et j’ai hâte d’ajouter des notifications ailleurs !
--- a/content/post/5-notification-system-gotify-vs-ntfy.md
+++ b/content/post/5-notification-system-gotify-vs-ntfy.md
@@ -0,0 +1,379 @@
 ---
 slug: notification-system-gotify-vs-ntfy
 title: Testing Gotify and Ntfy, a Self-Hosted Notification System
 description: Gotify or Ntfy? I tested both to create a reliable, self-hosted notification system for my homelab and integrated it with CI/CD pipeline.
 date: 2025-06-13
 draft: false
 tags:
  - notification
  - ntfy
  - gotify
  - ci-cd
 categories:
  - homelab
 ---
 ## Intro
 To know what is going on in my homelab and be warned when something fails, I want to setup a notification system where almost anything could seamlessly send me a message that I would  receive on my mobile.
 In the past I was using **Pushover**, which was great, but I want to explore new options, more modern and eventually self-hosted.
 ## Choose the Right Notification System
 The key elements to determine the right system for me would be:
 - **Android application**: mandatory, a sleek and intuitive UI is important.
 - **Integration**: I want the service integrated anywhere I want to be notified.
 - **Self hosted**: Host it myself is always better for privacy.
 After a quick research, the most suitable tools on the market are:
 - **Ntfy**
 - **Gotify**
 Given the comments on internet and after testing quickly both Android app, I can't really decide. I think Ntfy is the better option, but I will install and test them both to make my mind!
 ## Gotify
 I heard about Gotify some time ago, actually before looking at other alternatives, I had that one in mind. I quickly had a look at its [documentation](https://gotify.net/docs/) and this seems to be pretty straight forward.
 ### Installation
 As usual, I will deploy the Gotify server with `docker compose` on `dockerVM`, a VM hosting my applications as docker container. I create a new `gotify` folder in `/appli/docker/` and I copy paste my `docker-compose.yml` template in there.
 `docker-compose.yml`
 ```yaml
 services:
  gotify:
    image: gotify/server
    container_name: gotify
    volumes:
      - /appli/data/gotify/data/:/app/data
    environment:
      - TZ=Europe/Paris
      - GOTIFY_DEFAULTUSER_NAME=${GOTIFY_DEFAULTUSER_NAME}
      - GOTIFY_DEFAULTUSER_PASS=${GOTIFY_DEFAULTUSER_PASS}
    networks:
      - web
    labels:
    - traefik.enable=true
    - traefik.http.routers.gotify.rule=Host(`gotify.vezpi.me`)
    - traefik.http.routers.gotify.entrypoints=https
    - traefik.http.routers.gotify.tls.certresolver=letsencrypt
    - traefik.http.services.gotify.loadbalancer.server.port=80
    restart: always
 networks:
  web:
    external: true
 ```
 `.env`
 ```
 GOTIFY_DEFAULTUSER_NAME=vez
 GOTIFY_DEFAULTUSER_PASS=<password>
 ```
 In the [documentation](https://gotify.net/docs/config), I can see that several database backend can be used, by default it is using **sqlite3** which will be fine for the test. Switching to **PostgreSQL** could be a thing if I decide to stick with Gotify. In that same page, I can see the different environment variables that I can use to configure the server from the `docker-compose.yml` file.
 When my config files are ready, I create a new entry in my Caddy plugin in OPNsense to forward my new Gotify URL: https://gotify.vezpi.me.
 I also create the folder `/appli/data/gotify/data/` in `dockerVM` to mount it as a volume and store datas:
 ```bash
 mkdir -p /appli/data/gotify/data/
 ```
 Finally I spin the docker stack up:
 ```bash
 $ docker compose up -d
 [+] Running 5/5
 ✔ gotify Pulled
   ✔ 63ce8e957633 Pull complete
   ✔ e7def9680541 Pull complete
   ✔ 9a1821c438b4 Pull complete
   ✔ ad316556c9ff Pull complete
 [+] Running 1/1
 ✔ Container gotify  Started
 ```
 ✅ Reaching the URL https://gotify.vezpi.me gives me the Gotify login page:
 ![Gotify login page](img/gotify-login-page.png)
 After login, I can access the dashboard, with no messages obviously:
 ![Gotify dashboard on a fresh installation](img/gotify-dashboard-no-messages.png)
 ### Creating an Application
 To allow messages to be pushed, I before need to create an application for which the messages will be regrouped for. This can be done in two ways:
 - **WebUI**
 - **REST-API**
 For the test, I will use the WebUI, I click on the `APPS` button at the top and `CREATE APPLICATION`. I choose a wonderful application name and description.
 ![Create an application on Gotify](img/gotify-create-new-application.png)
 Once my application in created, a token is generated for it. I can edit the application to change anything, I can also upload an icon.
 ![Gotify application list showing my new Potato application](img/gotify-application-list.png)
 ### Testing
 My application is now visible on the sidebar, let's now try to send a message. To push it, I can use `curl` and I need the token of the application.
 ```bash
 curl "https://gotify.vezpi.me/message?token=<apptoken>" -F "title=Cooked!" -F "message=The potoaries are ready!" -F "priority=5"
 ```
 I instantly received the notification on my mobile and on my browser.
 I retried to send another message but with a lower priority: `-2`. I didn't get any notification in my browser, I see a slight differences between the two messages. On my mobile, only my watch received it, I don't see it on my screen, but I can find it on the notification center.
 ![Messages received on Gotify WebUI](img/gotify-messages-received.png)
 ### Android App
 Here some screenshots from my Android device:
 ![Capture d’écran de l’application Android Gotify pour la page de connexion](img/gotify-android-first-login.png)
 For some reason, a notification randomly pops up to tell me that I'm connected to Gotify:
 ![Capture d’écran de l’application Android Gotify avec les messages de test](img/gotify-android-test-messages.png)
 ### Conclusion
 On the [documentation](https://gotify.net/docs/msgextras), I found some extras features, like adding images or click actions. In summary, it does the job, that's it. Easy installation process, the utilization is not hard, but I need to create an application for a token, then add this token anytime I want to push messages there.
 ## Ntfy
 Ntfy seems very clean, let's install it and see what it got!
 ### Installation
 Same story here  with `docker compose` on `dockerVM`. I create a new `ntfy` folder in `/appli/docker/` and I copy paste the `docker-compose.yml` template.
 `docker-compose.yml`
 ```yaml
 services:
  ntfy:
    image: binwiederhier/ntfy
    container_name: ntfy
    command:
      - serve
    volumes:
      - /appli/data/ntfy/data:/var/cache/ntfy
    environment:
      - TZ=Europe/Paris
      - NTFY_BASE_URL=https://ntfy.vezpi.me
      - NTFY_CACHE_FILE=/var/cache/ntfy/cache.db
      - NTFY_AUTH_FILE=/var/cache/ntfy/auth.db
      - NTFY_ATTACHMENT_CACHE_DIR=/var/cache/ntfy/attachments
      - NTFY_AUTH_DEFAULT_ACCESS=deny-all
      - NTFY_BEHIND_PROXY=true
      - NTFY_ENABLE_LOGIN=true
    user: 1000:1000
    networks:
      - web
    labels:
    - traefik.enable=true
    - traefik.http.routers.ntfy.rule=Host(`ntfy.vezpi.me`)
    - traefik.http.routers.ntfy.entrypoints=https
    - traefik.http.routers.ntfy.tls.certresolver=letsencrypt
    - traefik.http.services.ntfy.loadbalancer.server.port=80
    healthcheck:
      test: ["CMD-SHELL", "wget -q --tries=1 http://ntfy:80/v1/health -O - | grep -Eo '\"healthy\"\\s*:\\s*true' || exit 1"]
      interval: 60s
      timeout: 10s
      retries: 3
      start_period: 40s
    restart: unless-stopped
 networks:
  web:
    external: true
 ```
 I also create the persistent volume folder `/appli/data/ntfy/data/` in `dockerVM`:
 ```bash
 mkdir -p /appli/data/ntfy/data/
 ```
 The [documentation](https://docs.ntfy.sh/config/) is impressive, I tried to gather the config for a quickstart. I should be good to start the server.
 Again here, I create a new domain for my Caddy reverse proxy plugin in OPNsense for the URL https://ntfy.vezpi.me.
 ```bash
 $ docker compose up -d
 [+] Running 4/4
 ✔ ntfy Pulled
   ✔ f18232174bc9 Already exists
   ✔ f5bf7a328fac Pull complete
   ✔ 572c745ef6c3 Pull complete
 [+] Running 1/1
 ✔ Container ntfy  Started
 ```
 ✅ The URL https://ntfy.vezpi.me gives me to the Ntfy dashboard:
 ![Ntfy dashboard](img/ntfy-login-dashboard.png)
 At start I don't have any user and none is created by default, as I denied all access to anonymous in the config, I need to create one.
 To list the users, I can use this command:
 ```bash
 $ docker exec -it ntfy ntfy user list
 user * (role: anonymous, tier: none)
 - no topic-specific permissions
 - no access to any (other) topics (server config)
 ```
 I create an user with admin privileges:
 ```bash
 $ docker exec -it ntfy ntfy user add --role=admin vez
 user vez added with role admin
 ```
 I can now login into the WebUI, and I can now switch to dark mode, my eyes are grateful.
 ### Topics
 In Ntfy there are no applications to create, but messages are grouped into topics, more readable than a token when sending messages. When the topic is created I can change the display name or send test messages. On the WebUI though I don't find any option to change the icon, where I can find this option in the Android App which is not really convenient.
 ![Example messages in Ntfy](img/ntfy-topic-messages.png)
 ### Testing
 Sending a message is actually harder than I thought. Because I set up authentication, I also need to authenticate to send messages:
 ```
 curl \
  -H "Title: Cooked!" \
  -H "Priority: high" \
  -d "The potatoes are ready!" \
  -u "vez:<password>" \
  https://ntfy.vezpi.me/patato
 ```
 ### Android App
 Here are some screenshots of Ntfy Android App:
 ![Captures de l’application Android Ntfy](img/ntfy-android-app.png)
 ### Conclusion
 Ntfy is a beautiful application with a really strong [documentation](https://docs.ntfy.sh/). The possibilities are endless and the list of integration is impressive. The installation was not hard but required a bit of more setup. The needs for CLI to configure users and permissions is not really convenient.
 On the Android App, I regret that there is not a view to see all the messages from different topics. On the other hand on the WebUI, I wanted to set icons for each topics. What I found interesting was the possibility to have topics from different servers.
 ## Comparison
 **Gotify** is simple, all users will have access to any applications. You don't need user credentials to push messages, only the application token. The Android App is effective, but personally while the icon is funny, I don't really like it.
 **Ntfy** feels more advanced and complete, with fine grained permission. The UI is sleek yet still simple, the possibilities endless.
 Overall, only small details make me favor Ntfy over Gotify, eg., having access to topics from different servers, ACL or be able to add emojis to the messages, but both applications are really good for the job. 
 ## Implementing Real Scenario Notification
 While I was setting up my CI/CD pipeline for my blog deployment, I wanted to be warned whenever something happens, let see how I can implement that with Ntfy.
 ### Access Control
 I could use my `admin` user to either send messages from the pipeline and receive them on my Android device, while this is easier to setup, I want to implement least access privileges, which Ntfy allow. I will then create a dedicated user for my CI/CD pipeline and another for my Android device.
 #### Pipeline User
 This guy will only be allowed to send messages on the `blog` topic, I call it `gitea_blog`.
 ```bash
 $ ntfy user add gitea_blog
 user gitea_blog added with role user
 $ ntfy access gitea_blog blog wo
 granted write-only access to topic blog
 user gitea_blog (role: user, tier: none)
 - write-only access to topic blog
 ```
 I quickly try to send a message on that topic:
 ```bash
 $ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/blog
 {"id":"xIgwz9dr1w9Z","time":1749587681,"expires":1749630881,"event":"message","topic":"blog","message":"Message test from gitea_blog!"}
 ```
 ![Test d’envoi de messages sur le topic blog avec Ntfy  ](img/ntfy-testing-gitea-blog-user.png)
 ✅ Message received!
 I also try to send a message on my test topic:
 ```bash
 $ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/potato
 {"code":40301,"http":403,"error":"forbidden","link":"https://ntfy.sh/docs/publish/#authentication"}
 ```
 ❌ Denied as expected.
 #### Android Device User
 From my Android device I only want to receive messages, but on all topics. I create the user `android_s25u`:
 ```bash
 $ ntfy user add android_s25u
 user android_s25u added with role user
 $ ntfy access android_s25u "*" ro
 granted read-only access to topic *
 user android_s25u (role: user, tier: none)
 - read-only access to topic *
 ```
 ✅ After setting up the user on the Ntfy Android App, I can read my messages on `https://ntfy.vezpi.me/blog` and also on the testing one.
 ### Implementation
 Now my users are setup, I want to add a `Notify` job in my CI/CD pipeline for the blog deployment in **Gitea**, you can find the full workflow in [this article]({{< ref "post/4-blog-deployment-ci-cd-pipeline-gitea-actions" >}}).
 #### Create a Secret
 To allow my Gitea Runner to use my `gitea_blog` user in its job, I want to create a secret. I explore the `Blog` Gitea repository `Settings`, then `Actions` > `Secrets` > `Add Secret`. Here I set the secret value with the `<user>:<password>` format:
 ![Add a secret in the blog Gitea repository](img/gitea-blog-ntfy-credentials.png)
 ### Write the `Notify` Code
 Now I can write the code which will send me a message when a new deployment occurs.
 If the deployment is successful, the priority would be minimal, no notifications needed on my mobile, just for me to view the events in the Android Ntfy App if I need to.
 If anything fails, I want to be notified on my mobile with higher priority. Ntfy allows me to add actions on my notifications, I will create 2 actions:
 - **View Run**: Direct link to the workflow run in Gitea to see what happened.
 - **Verify Blog**: Link to the blog to make sure it is still online.
 ```yaml
  Notify:
    needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
    runs-on: ubuntu
    if: always()
    env:
      NTFY_URL: https://ntfy.vezpi.me
      NTFY_TOPIC: blog
      NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
    steps:
      - name: Notify Workflow Result
        run: |
          if [[
            "${{ needs.Check-Rebuild.result }}" == "success" &&
           ("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
            "${{ needs.Deploy-Staging.result }}" == "success" &&
            "${{ needs.Test-Staging.result }}" == "success" && 
            "${{ needs.Merge.result }}" == "success" &&
            "${{ needs.Deploy-Production.result }}" == "success" &&
            "${{ needs.Test-Production.result }}" == "success" &&
           ("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
          ]]; then
            curl -H "Priority: min" \
                 -H "Tags: white_check_mark" \
                 -d "Blog workflow completed successfully." \
                 -u ${NTFY_TOKEN} \
                 ${NTFY_URL}/${NTFY_TOPIC}
          else
            curl -H "Priority: high" \
                 -H "Tags: x" \
                 -H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
                              view, Verify Blog, https://blog.vezpi.com, clear=true" \
                 -d "Blog workflow failed!" \
                 -u ${NTFY_TOKEN} \
                 ${NTFY_URL}/${NTFY_TOPIC}
          fi
 ```
 ✅ Testing both cases, work as expected:
 ![Checking both test scenario in Ntfy WebUI](img/ntfy-testing-blog-notifications.png)
 ## Conclusion
 After testing **Gotify** and **Ntfy**, I found my next notification system. They are both good for the job but I had to pick one and I have a little preference for Ntfy.
 The application would be perfect if I could manage the users and access from the WebUI, also I would prefer to manage the topic's icon globally and not having to upload it from my mobile.
 Anyway I'm very satisfied with the results on my first implementation and I look forward to add notification elsewhere!
--- a/content/post/6-ac-automation-home-assistant-node-red.fr.md
+++ b/content/post/6-ac-automation-home-assistant-node-red.fr.md
@@ -0,0 +1,708 @@
 ---
 slug: ac-automation-home-assistant-node-red
 title: Automatisation Complète de la Climatisation avec Home Assistant et Node-RED
 description: Comment j’automatise ma clim avec Home Assistant et Node-RED pour réagir à la température, l’humidité et à tous les évènements quotidiens.
 date: 2025-06-27
 draft: false
 tags:
  - home-automation
  - home-assistant
  - node-red
 categories:
  - automation
 ---
 ## Intro
 Dans mon appartement, j’ai un système de climatisation Daikin, qui me permet de rafraîchir en été mais aussi de chauffer en hiver. Il est composé de 3 unités intérieures :
 - Salon
 - Chambre parentale
 - Couloir (juste en face de mon bureau et de la chambre de mon fils)
 J’ai toujours trouvé ça pénible de devoir les allumer manuellement quand j’en avais besoin, et j’oubliais souvent de les éteindre ensuite, sans parler de la télécommande que je passais mon temps à chercher.
 Et si je pouvais automatiser tout ça ? Après tout, j’utilise déjà Home Assistant pour piloter beaucoup de choses chez moi, alors contrôler la clim, ça me semble logique.
 ### Home Assistant
 Home Assistant, c’est le cerveau de ma maison connectée. Il relie tous mes appareils (lumières, capteurs, volets, etc.) dans une interface unique. Sa vraie force, c’est la possibilité de créer des automatisations : si quelque chose se passe, alors fait ça. Des actions simples comme “allumer la lumière de la cuisine quand un mouvement est détecté” se mettent en place en quelques clics. Et pour des scénarios plus avancés, Home Assistant propose un système de scripts en YAML avec des conditions, des minuteries, des déclencheurs, et même du templating.
 Mais dès qu’on commence à faire des automatisations un peu complexes, qui dépendent de plusieurs capteurs, d’horaires spécifiques ou de la présence de quelqu’un, ça devient vite difficile à lire. Les blocs de code YAML s’allongent, et on ne sait plus trop ce qui fait quoi, surtout quand on veut corriger un petit détail plusieurs semaines plus tard.
 ### Node-RED
 C’est exactement pour ça que je suis passé à Node-RED. C’est un outil visuel qui permet de construire des logiques avec des blocs appelés “nœuds”, qu’on relie entre eux avec des flèches pour créer un **flow**. Chaque nœud fait une petite action : déclencher à une certaine heure, vérifier une condition, envoyer une commande à un appareil, etc. Au lieu d’écrire du YAML, on glisse les éléments, on les connecte, et c’est tout.
 Node-RED ne remplace pas Home Assistant, il le renforce. Je ne détaillerai pas l'installation de Node-RED ni son intégration à HA, je l'ai fait il y a deux ans, mais de mémoire c'est assez simple.
 ## Ancien Workflow
 J’avais déjà une solution plutôt efficace pour contrôler ma climatisation via Home Assistant et Node-RED, mais je voulais l’améliorer pour qu’elle prenne aussi en compte le taux d’humidité dans l’appartement. Mon workflow actuel, bien qu’il fonctionne, n’était pas vraiment évolutif et assez difficile à maintenir :  
 ![Ancien workflow Node-RED pour contrôler la climatisation](img/node-red-ha-ac-automation-before.png)
 ## Nouveau Workflow
 Plutôt que de bricoler ce flow existant, j’ai préféré repartir de zéro avec le même objectif : piloter le système de climatisation en prenant en compte tous les capteurs disponibles : thermomètres, humidité, capteurs d’ouverture, présence des occupants, moment de la journée, etc.
 ### Objectifs
 L’idée est assez simple : ne plus avoir à penser à la climatisation, tout en restant efficace.
 Mais concrètement, qu’est-ce que ça veut dire ? Je veux que la température et le taux d’humidité restent dans des valeurs confortables, que je sois présent ou non. Si j’ouvre les fenêtres, la clim doit s’arrêter. Si l’air est trop humide, je veux qu’il soit asséché. Si j’allume ou éteins manuellement la clim, je ne veux pas que ça écrase mes réglages. La nuit, je n’ai pas besoin de rafraîchir le salon et je veux aussi que le système soit silencieux, etc.
 Pour m’aider à faire tout ça, j’utilise 4 [capteurs de température et d’humidité Aqara](https://eu.aqara.com/fr-eu/products/aqara-temperature-and-humidity-sensor), un dans chacune de mes pièces principales. J’utilise aussi quelques [capteurs d’ouverture Aqara](https://eu.aqara.com/fr-eu/products/aqara-door-and-window-sensor) pour savoir si une fenêtre est ouverte.
 ### Workflow
 Laissez-moi vous présenter mon nouveau workflow de climatisation dans Node-RED, et vous expliquer en détail comment il fonctionne :  
 ![New Node-RED air conditioning workflow](img/node-red-new-ac-workflow-with-legend.png)
 #### #### 1. Capteurs de Température
 Dans le premier nœud, j’ai regroupé tous les capteurs thermiques dans un seul `trigger state node`, en ajoutant non seulement la température mais aussi le taux d’humidité géré par chaque capteur. Ce nœud contient donc une liste de 8 entités (2 pour chaque capteur). À chaque fois qu’une de ces 8 valeurs change, le nœud est déclenché :  
 ![Nœud trigger state dans Node-RED avec les 8 entités](img/node-red-temperature-sensors-trigger-node.png)
 Chacun de mes capteurs thermiques porte un nom de couleur en français, car ils ont tous un autocollant coloré pour les distinguer :
 - **Jaune** : Salon
 - **Bleu** : Chambre
 - **Rouge** : Bureau
 - **Vert** : Chambre de mon fils
 Le deuxième nœud est un `function node` dont le rôle est de déterminer à quelle pièce appartient le capteur :
 ```js
 const association = {
    "temperature_jaune": "salon",
    "temperature_bleu": "chambre",
    "temperature_rouge": "couloir",
    "temperature_vert": "couloir"
 };
 // Match pattern like: sensor.temperature_rouge_temperature
 const match = msg.topic.match(/^sensor\.(.+)_(temperature|humidity)$/);
 if (!match) {
    node.warn("Topic format not recognized: " + msg.topic);
    return null;
 }
 msg.payload = { 
    room: association[match[1]],
    sensor: match[1]
 };
 return msg;
 ```
 Pour le dernier nœud, dans la majorité des cas, les capteurs envoient deux messages simultanés : l’un pour la température, l’autre pour l’humidité. J’ai donc ajouté un `join node` pour fusionner ces deux messages s’ils sont envoyés dans la même seconde :  
 ![Join node in Node-RED to merge temperature and humidity](img/node-red-temperature-sensor-join-node.png)
 #### 2. Notification
 Il peut arriver que les capteurs de température n’envoient plus d’état pendant un certain temps, pour une raison ou une autre. Dans ce cas, ils renvoient simplement leur dernière valeur connue, ce qui peut bloquer l’unité de climatisation associée.
 La solution que j’ai trouvée efficace consiste à envoyer une notification si un capteur n’a pas transmis de nouvelle valeur depuis plus de 3 heures. En fonctionnement normal, chaque capteur envoie une mise à jour environ toutes les 15 minutes.
 Le premier nœud est un `function node` un peu technique, qui crée une variable de flux comme minuteur pour chaque capteur. Une fois le délai écoulé, un message est envoyé au nœud suivant :
 ```js
 const sensor = msg.payload.sensor;
 const timeoutKey = `watchdog_${sensor}`;
 const messages = {
    "temperature_jaune": {"title": "Température Salon", "message": "Capteur de température du salon semble hors service"},
    "temperature_bleu": {"title": "Température Chambre", "message": "Capteur de température de la chambre semble hors service"},
    "temperature_rouge": {"title": "Température Bureau", "message": "Capteur de température du bureau semble hors service"},
    "temperature_vert": {"title": "Température Raphaël", "message": "Capteur de température de Raphaël semble hors service"}
 };
 // Clear existing timer
 const existing = flow.get(timeoutKey);
 if (existing) clearTimeout(existing);
 // Set new timer
 const timer = setTimeout(() => {
    node.send({
        payload: `⚠️ No update from ${sensor} in 3 hours.`,
        sensor: sensor,
        title: messages[sensor]["title"],
        message: messages[sensor]["message"]
    });
 }, 3 * 60 * 60 * 1000); // 3 hours
 flow.set(timeoutKey, timer);
 return null; // Don't send anything now
 ```
 Le second nœud est un `call service node` qui envoie une notification sur mon téléphone Android avec les informations fournies :  
 ![Node-RED call service node for notification](img/node-red-call-service-node-notification.png)
 #### 3. Curseurs de Température
 Pour pouvoir ajuster la température sans avoir à modifier tout le workflow, j’ai créé deux entrées (ou helper) Home Assistant, de type number, pour chaque unité de climatisation, ce qui me fait un total de 6 entrées :  
 ![Curseur de température dans Home Assistant pour chaque unité](img/home-assistant-temperature-room-sliders.png)
 Ces valeurs représentent la température de base utilisée pour le calcul des seuils, en fonction des offsets que je détaillerai plus loin.
 Le premier nœud est un `trigger state node` qui regroupe les 6 entités. Si je modifie l’une de ces valeurs, le nœud est déclenché :  
 ![Node-RED trigger state node for sliders](img/node-red-trigger-state-mode-for-sliders.png)
 Le deuxième nœud est un `function node`, qui permet de déterminer la pièce concernée :
 ```js
 const association = {
    "input_number.temp_ete_salon": "salon",
    "input_number.temp_hiver_salon": "salon",
    "input_number.temp_ete_chambre": "chambre",
    "input_number.temp_hiver_chambre": "chambre",
    "input_number.temp_ete_couloir": "couloir",
    "input_number.temp_hiver_couloir": "couloir"
 };
 msg.payload = { room: association[msg.topic] }; 
 return msg;
 ```
 #### 4. Interrupteurs
 Dans Home Assistant, j’utilise d’autres entrées, mais cette fois sous forme de booléens. Le plus important est celui dédié à la climatisation, qui me permet de désactiver manuellement tout le workflow. J’en ai d’autres qui sont automatisés, par exemple pour le moment de la journée ou la détection de présence à la maison.
 J’utilise un autre `trigger state node` qui regroupe tous mes interrupteurs sous forme de booléens, y compris un bouton de test utilisé pour le débogage :  
 ![Node-RED trigger state node for toggles](img/node-red-trigger-state-node-toggles.png)
 Comme ces interrupteurs impactent tout l’appartement (et non une seule unité), le nœud suivant est un `change node` qui définit la valeur de la pièce à `partout` :  
 ![Node-RED change node to set room to partout](img/node-red-change-node-room-partout.png)
 #### 5. Fenêtres
 Les derniers déclencheurs sont les fenêtres. Si j’ouvre ou ferme une fenêtre située près d’une unité, cela active le workflow. J’ai des capteurs d’ouverture sur certaines fenêtres, mais pour l’unité du couloir, j’utilise l’état des fenêtres Velux. Certaines pièces ayant plusieurs fenêtres, j’ai créé une entrée de type groupe pour les regrouper.
 Le premier nœud est le dernier `trigger state node`. La valeur retournée est une string qu’il faudra ensuite convertir en booléen :  
 ![Node-RED trigger state node for windows](img/node-red-trigger-state-node-windows.png)
 Juste après, un autre `function node` permet d’identifier la pièce concernée :
 ```js
 const association = {
    "binary_sensor.groupe_fenetre_salon": "salon",
    "binary_sensor.fenetre_chambre_contact": "chambre",
    "cover.groupe_fenetre_couloir": "couloir"
 };
 msg.payload = { 
    room: association[msg.topic]
 };
 return msg;
 ```
 #### 6. Fenêtre Watchdog
 Quand j’ouvre une fenêtre, ce n’est pas forcément pour la laisser ouverte longtemps. Je peux simplement faire sortir le chat ou jeter un œil au portail. Je ne veux pas que la climatisation se coupe dès que j’ouvre une fenêtre. Pour contourner cela, j’ai mis en place un watchdog pour chaque unité, afin de retarder l’envoi du message pendant un certain temps.
 Le premier nœud est un `switch node`. En fonction de la pièce transmise par le nœud précédent, il envoie le message au _watchdog_ correspondant :  
 ![Node-RED switch node based on the room for the watchdog](img/node-red-switch-node-room-selector-watchdog.png)
 Viennent ensuite les _watchdogs_, des `trigger nodes`, qui retardent le message pendant un certain temps, et prolongent ce délai si un autre message est reçu entre-temps :  
 ![Node-RED trigger node for window watchdog](img/node-red-trigger-node-window-watchdog.png)
 #### 7. Climatisation Activée ?
 Tous ces déclencheurs arrivent maintenant dans la chaîne de traitement, qui va déterminer ce que le système doit faire. Mais avant cela, on vérifie si l’automatisation est activée. J’ai ajouté ce kill switch au cas où, même si je l’utilise rarement.
 Le premier nœud est un `delay node` qui régule le débit des messages entrants à 1 message par seconde :  
 ![Node-RED delay node to limit the rate to 1 message per second](img/node-red-delay-node-1-msg-per-second.png)
 Le deuxième nœud est un `current state node` qui vérifie si le booléen `climatisation` est activé :  
 ![Node-RED current state node for climatisation](img/node-red-current-state-node-climatisation-enabled.png)
 #### 8. Configuration des pièces
 L’idée ici est d’associer la configuration de la pièce au message. Chaque pièce a sa propre configuration : quelle unité est utilisée, quels capteurs sont associés, et surtout, dans quelles conditions elle doit s’allumer ou s’éteindre.
 Les unités de climatisation disposent de 4 modes :
 - Refroidissement (Cool)
 - Déshumidification (Dry)
 - Ventilation (Fan)
 - Chauffage (Heat)
 Pour déterminer quel mode utiliser, j’utilise des seuils pour chaque mode et la vitesse de ventilation, avec différents offsets selon la situation. Je peux ainsi définir un offset spécifique la nuit ou en cas d’absence. Je peux aussi définir un offset sur `disabled`, ce qui forcera l’arrêt de l’unité.
 Le premier nœud est un `switch node`, basé sur la valeur `room`, qui oriente le message vers la configuration associée. Si la pièce est `partout`, le message est dupliqué vers les 3 configurations de pièce :  
 ![Node-RED switch node for room configuration](img/node-red-switch-node-room-config.png)
 Il est ensuite connecté à un `change node`, qui ajoute la configuration dans `room_config`. Voici un exemple avec la configuration du salon :
 ```json
 {
    "threshold": {
        "cool": {
            "start": {
                "1": 1,
                "2": 1.5,
                "3": 2,
                "4": 2.5,
                "quiet": 0
            },
            "stop": -0.3,
            "target": -1,
            "offset": {
                "absent": 1,
                "vacances": "disabled",
                "fenetre": "disabled",
                "matin": "disabled",
                "jour": 0,
                "soir": 0,
                "nuit": "disabled"
            }
        },
        "dry": {
            "start": {
                "quiet": -1
            },
            "stop": -1.5,
            "offset": {
                "absent": "1.5",
                "vacances": "disabled",
                "fenetre": "disabled",
                "matin": "disabled",
                "jour": 0,
                "soir": 0,
                "nuit": "disabled"
            }
        },
        "fan_only": {
            "start": {
                "1": -0.3,
                "quiet": -0.5
            },
            "stop": -0.7,
            "offset": {
                "absent": "disabled",
                "vacances": "disabled",
                "fenetre": "disabled",
                "matin": "disabled",
                "jour": 0,
                "soir": 0,
                "nuit": "disabled"
            }
        },
        "heat": {
            "start": {
                "1": 0,
                "2": -1.5,
                "quiet": 0
            },
            "stop": 1,
            "target": 1,
            "offset": {
                "absent": -1.5,
                "vacances": -3,
                "fenetre": "disabled",
                "matin": 0,
                "jour": 0,
                "soir": 0,
                "nuit": -1.5
            }
        }
    },
    "unit": "climate.clim_salon",
    "timer": "timer.minuteur_clim_salon",
    "window": "binary_sensor.groupe_fenetre_salon",
    "thermometre": "sensor.temperature_jaune_temperature",
    "humidity": "sensor.temperature_jaune_humidity",
    "temp_ete": "input_number.temp_ete_salon",
    "temp_hiver": "input_number.temp_hiver_salon"
 }
 ```
 #### #### 9. Calcul
 Maintenant que le message contient la configuration de la pièce, on entre dans la phase de calcul. On dispose du nom de l’unité de climatisation, des capteurs associés, de la température de base souhaitée et de l’offset à appliquer. À partir de ces données, on récupère les états actuels et on effectue les calculs.
 Le premier nœud est un `delay node` qui régule le débit des messages entrants, car le bloc précédent a potentiellement généré trois messages si toutes les pièces sont concernées.
 Le deuxième nœud est le plus important du workflow, un `function node` qui remplit plusieurs rôles :
 - Récupère les états des capteurs depuis Home Assistant
 - Calcule les seuils des modes à partir des offsets
 - Désactive certains modes si les conditions sont remplies
 - Injecte les valeurs dans le `payload`
 ```js
 // --- Helper: Get Home Assistant state by entity ID ---
 function getState(entityId) {
    return global.get("homeassistant.homeAssistant.states")[entityId]?.state;
 }
 // --- Determine current time period based on sensors ---
 const periods = ["jour", "soir", "nuit", "matin"];
 msg.payload.period = periods.find(p => getState(`binary_sensor.${p}`) === 'on') || 'unknown';
 // --- Determine presence status (absent = inverse of presence) ---
 const vacances = getState("input_boolean.absent");
 const absent = getState("input_boolean.presence") === 'on' ? 'off' : 'on';
 /**
 * Recursively adds the base temperature and offset to all numeric start values in a threshold config
 */
 function applyOffsetToThresholds(threshold, baseTemp, globalOffset) {
    for (const [key, value] of Object.entries(threshold)) {
        if (key === "offset") continue;
        if (typeof value === 'object') {
            applyOffsetToThresholds(value, baseTemp, globalOffset);
        } else {
            threshold[key] += baseTemp + globalOffset;
        }
    }
 }
 /**
 * Calculates the global offset for a mode, based on presence, vacation, window, and time of day
 */
 function calculateGlobalOffset(offsets, modeName, windowState, disabledMap) {
    let globalOffset = 0;
    for (const [key, offsetValue] of Object.entries(offsets)) {
        let conditionMet = false;
        if (key === msg.payload.period) conditionMet = true;
        else if (key === "absent" && absent === 'on') conditionMet = true;
        else if (key === "vacances" && vacances === 'on') conditionMet = true;
        else if ((key === "fenetre" || key === "window") && windowState === 'on') conditionMet = true;
        if (conditionMet) {
            if (offsetValue === 'disabled') {
                disabledMap[modeName] = true;
                return 0; // Mode disabled immediately
            }
            globalOffset += parseFloat(offsetValue);
        }
    }
    return globalOffset;
 }
 /**
 * Main logic: compute thresholds for the specified room using the provided config
 */
 const cfg = msg.payload.room_config;
 const room = msg.payload.room;
 // Normalize window sensor state
 const rawWindow = getState(cfg.window);
 const window = rawWindow === 'open' ? 'on' : rawWindow === 'closed' ? 'off' : rawWindow;
 // Gather temperatures
 const temps = cfg.thermometre.split(',')
    .map(id => parseFloat(getState(id)))
    .filter(v => !isNaN(v));
 const temp_avg = temps.reduce((a, b) => a + b, 0) / temps.length;
 const temp_min = Math.min(...temps);
 const temp_max = Math.max(...temps);
 // Gather humidity
 const humidities = cfg.humidity.split(',')
    .map(id => parseFloat(getState(id)))
    .filter(v => !isNaN(v));
 const humidity_avg = humidities.reduce((a, b) => a + b, 0) / humidities.length;
 const humidity_min = Math.min(...humidities);
 const humidity_max = Math.max(...humidities);
 // Get base temps
 const temp_ete = parseFloat(getState(cfg.temp_ete));
 const temp_hiver = parseFloat(getState(cfg.temp_hiver));
 // Process modes
 const { threshold } = cfg;
 const modes = ["cool", "dry", "fan_only", "heat"];
 const disabled = {};
 for (const mode of modes) {
    const baseTemp = (mode === "heat") ? temp_hiver : temp_ete;
    const globalOffset = calculateGlobalOffset(threshold[mode].offset, mode, window, disabled);
    applyOffsetToThresholds(threshold[mode], baseTemp, globalOffset);
 }
 // Final message
 msg.payload = {
    ...msg.payload,
    unit: cfg.unit,
    timer: cfg.timer,
    threshold,
    window,
    temp: {
        min: temp_min,
        max: temp_max,
        avg: Math.round(temp_avg * 100) / 100
    },
    humidity: {
        min: humidity_min,
        max: humidity_max,
        avg: Math.round(humidity_avg * 100) / 100
    },
    disabled
 };
 return msg;
 ```
 Le troisième nœud est un `filter node`, qui ignore les messages suivants ayant un contenu similaire :  
 ![Node-RED filter node to block similar message](img/node-red-filter-node-blocker.png)
 Le quatrième nœud vérifie si un verrou est actif à l’aide d’un `current state node`. On regarde si le minuteur associé à l’unité est inactif. Si ce n’est pas le cas, le message est ignoré :  
 ![Node-RED current state node for timer lock](img/node-red-current-state-node-lock-timer.png)
 Le dernier nœud est un autre `current state node` qui permet de récupérer l’état actuel de l’unité et ses propriétés :  
 ![Node-RED current state node to get current unit state](img/node-red-current-state-node-get-unit-state.png)
 #### 10. État Cible
 Après les calculs, il s'agit maintenant de déterminer quel doit être le mode cible, quelle action effectuer pour converger vers ce mode à partir de l’état actuel, et le cas échéant, quelle vitesse de ventilation utiliser pour ce mode.
 Les trois nœuds suivants sont des `function nodes`. Le premier détermine le mode cible à adopter parmi : `off`, `cool`, `dry`, `fan_only` et `heat` :
 ```js
 const minHumidityThreshold = 52;
 const maxHumidityThreshold = 57;
 // Helper: check if mode can be activated or stopped
 function isModeEligible(mode, temps, humidity, thresholds, currentMode) {
    const isCurrent = (mode === currentMode);
    const threshold = thresholds[mode];
    if (msg.payload.disabled?.[mode]) return false;
    // Determine which temperature to use for start/stop:
    // start: temp.max (except heat uses temp.min)
    // stop: temp.avg
    let tempForCheckStart;
    if (mode === "heat") {
        tempForCheckStart = temps.min;  // heat start uses min temp
    } else {
        tempForCheckStart = temps.max;  // others start use max temp
    }
    const tempForCheckStop = temps.avg;
    // Dry mode also depends on humidity thresholds
    // humidity max for start, humidity avg for stop
    let humidityForCheckStart = humidity.max;
    let humidityForCheckStop = humidity.avg;
    // For heat mode (inverted logic)
    if (mode === "heat") {
        if (!isCurrent) {
            const minStart = Math.min(...Object.values(threshold.start));
            return tempForCheckStart < minStart;
        } else {
            return tempForCheckStop < threshold.stop;
        }
    }
    // For dry mode (humidity-dependent)
    if (mode === "dry") {
        // Skip if humidity too low
        if (humidityForCheckStart <= (isCurrent ? minHumidityThreshold : maxHumidityThreshold)) return false;
        const minStart = Math.min(...Object.values(threshold.start));
        if (!isCurrent) {
            return tempForCheckStart >= minStart;
        } else {
            return tempForCheckStop >= threshold.stop;
        }
    }
    // For cool and fan_only
    if (!isCurrent) {
        const minStart = Math.min(...Object.values(threshold.start));
        return tempForCheckStart >= minStart;
    } else {
        return tempForCheckStop >= threshold.stop;
    }
 }
 // --- Main logic ---
 const { threshold, temp, humidity, current_mode, disabled } = msg.payload;
 const priority = ["cool", "dry", "fan_only", "heat"];
 let target_mode = "off";
 // Loop through priority list and stop at the first eligible mode
 for (const mode of priority) {
    if (isModeEligible(mode, temp, humidity, threshold, current_mode)) {
        target_mode = mode;
        break;
    }
 }
 msg.payload.target_mode = target_mode;
 if (target_mode === "cool" || target_mode === "heat") {
  msg.payload.set_temp = true;
 }
 return msg;
 ```
 Le second compare le mode actuel avec le mode cible et choisit l’action à effectuer :
 - **check** : le mode actuel est identique au mode cible.
 - **start** : l’unité est éteinte, mais un mode actif est requis.
 - **change** : l’unité est allumée, mais le mode cible est différent du mode actuel (et n’est pas `off`).
 - **stop** : l’unité est allumée mais doit être arrêtée.
 ```js
 let action = "check"; // default if both are same
 if (msg.payload.current_mode === "off" && msg.payload.target_mode !== "off") {
    action = "start";
 } else if (msg.payload.current_mode !== "off" && msg.payload.target_mode !== "off" && msg.payload.current_mode !== msg.payload.target_mode) {
    action = "change";
 } else if (msg.payload.current_mode !== "off" && msg.payload.target_mode === "off") {
    action = "stop";
 }
 msg.payload.action = action;
 return msg;
 ```
 Le dernier nœud détermine la vitesse de ventilation appropriée pour le mode cible, en fonction des seuils définis :
 ```js
 // Function to find the appropriate speed key based on temperature and mode
 function findSpeed(thresholdStart, temperature, mode) {
  let closestSpeed = 'quiet';
  let closestTemp = mode === 'heat' ? Infinity : -Infinity;
  for (const speedKey in thresholdStart) {
    if (speedKey !== 'quiet') {
      const tempValue = thresholdStart[speedKey];
      if (mode === 'heat') {
        if (tempValue >= temperature && tempValue <= closestTemp) {
          closestSpeed = speedKey;
          closestTemp = tempValue;
        }
      } else { // cool, fan_only
        if (tempValue <= temperature && tempValue >= closestTemp) {
          closestSpeed = speedKey;
          closestTemp = tempValue;
        }
      }
    }
  }
  return closestSpeed;
 }
 if (msg.payload.target_mode && msg.payload.target_mode !== "off" && msg.payload.target_mode !== "dry") {
  const modeData = msg.payload.threshold[msg.payload.target_mode];
  if (modeData && modeData.start) {
    if (msg.payload.target_mode === "heat") {
      msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.min, 'heat');
    } else {
      msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.max, 'cool');
    }
  } else {
    node.error("Invalid mode data or missing 'start' thresholds", msg);
  }
 } else {
  // No need for speed in 'off' or 'dry' modes
  msg.payload.speed = null;
 }
 return msg;
 ```
 #### 11. Choix de l'Action
 En fonction de l’action à effectuer, le `switch node` va router le message vers le bon chemin :  
 ![Node-RED `switch node` pour sélectionner l’action](img/node-red-switch-node-select-action.png)
 #### 12. Démarrage
 Lorsque l’action est `start`, il faut d’abord allumer l’unité. Cela prend entre 20 et 40 secondes selon le modèle, et une fois démarrée, l’unité est verrouillée pendant un court laps de temps pour éviter les messages suivants.
 Le premier nœud est un `call service node` utilisant le service `turn_on` sur l’unité de climatisation :  
 ![Node-RED call service node with turn_on service](img/node-red-call-service-node-turn-on.png)
 Le second nœud est un autre `call service node` qui va démarrer un minuteur de verrouillage (lock timer) pour cette unité pendant 45 secondes :  
 ![Node-RED call service node to start the unit timer](img/node-red-call-service-node-start-timer.png)
 Le dernier est un `delay node` de 5 secondes, pour laisser le temps à l’intégration Daikin de Home Assistant de refléter le nouvel état.
 ---
 #### 13. Changement
 L’action `change` est utilisée pour passer d’un mode à un autre, mais aussi juste après l’allumage.
 Le premier nœud est un `call service node` utilisant le service `set_hvac_mode` sur l’unité de climatisation :  
 ![Node-RED call service node with set_hvac_mode service](img/node-red-call-service-node-set-hvac-mode.png)
 Le nœud suivant est un `delay node` de 5 secondes.
 Le dernier vérifie, avec un `switch node`, si la température cible doit être définie. Cela n’est nécessaire que pour les modes `cool` et `heat` :  
 ![Node-RED switch node for set_temp](img/node-red-switch-node-set-temp.png)
 ---
 #### 14. Définir la Température Cible
 La température cible est uniquement pertinente pour les modes `cool` et `heat`. Avec une climatisation classique, vous définissez une température à atteindre — c’est exactement ce qu’on fait ici. Mais comme chaque unité utilise son propre capteur interne pour vérifier cette température, je ne leur fais pas vraiment confiance. Si la température cible est déjà atteinte selon l’unité, elle ne soufflera plus du tout.
 Le premier nœud est un autre `call service node` utilisant le service `set_temperature` :  
 ![Node-RED call service node with set_temperature service](img/node-red-call-service-node-set-temperature-service.png)
 Encore une fois, ce nœud est suivi d’un `delay node` de 5 secondes.
 #### 15. Vérification
 L’action `check` est utilisée presque tout le temps. Elle consiste uniquement à vérifier et comparer la vitesse de ventilation souhaitée, et à la modifier si nécessaire.
 Le premier nœud est un `switch node` qui vérifie si la valeur `speed` est définie :  
 ![Node-RED switch node to test if speed is defined](img/node-red-switch-node-fan-speed.png)
 Le deuxième est un autre `switch node` qui compare la valeur `speed` avec la vitesse actuelle :  
 ![Node-Red switch node to compare speed](img/node-red-switch-node-compare-speed.png)
 Enfin, le dernier nœud est un `call service node` utilisant le service `set_fan_mode` pour définir la vitesse du ventilateur :  
 ![Node-RED call service node with set_fan_mode](img/node-red-call-service-node-set-fan-mode.png)
 #### 16. Arrêt
 Lorsque l’action est `stop`, l’unité de climatisation est simplement arrêtée.
 Le premier nœud est un `call service node` utilisant le service `turn_off` :  
 ![Node-RED call service node with turn_off service](img/node-red-call-service-node-turn-off.png)
 Le deuxième nœud est un autre `call service node` qui va démarrer le minuteur de verrouillage de cette unité pour 45 secondes.
 #### 17. Intervention Manuelle
 Parfois, pour une raison ou une autre, on souhaite utiliser la climatisation manuellement. Dans ce cas, on ne veut pas que le flux Node-RED vienne écraser notre réglage manuel, du moins pendant un certain temps.  
 Node-RED utilise son propre utilisateur dans Home Assistant, donc si une unité change d’état sans cet utilisateur, c’est qu’une intervention manuelle a eu lieu.
 Le premier nœud est un `trigger state node`, qui envoie un message dès qu’une unité AC change d’état :  
 ![node-red-trigger-state-unit-change.png](img/node-red-trigger-state-unit-change.png)
 Le deuxième est un `function node` qui associe l’unité avec son minuteur :
 ```js
 const association = {
    "climate.clim_salon": "timer.minuteur_clim_salon",
    "climate.clim_chambre": "timer.minuteur_clim_chambre",
    "climate.clim_couloir": "timer.minuteur_clim_couloir"
 };
 msg.payload = association[msg.topic]; 
 return msg;
 ```
 Le troisième est un `switch node` qui laisse passer le message uniquement si le `user_id` **n’est pas** celui de Node-RED :  
 ![Node-RED switch node not specific user_id](img/node-red-switch-node-user-id.png)
 Le quatrième est un autre `switch node` qui vérifie que le champ `user_id` **est bien défini** :  
 ![Node-RED switch node check user_id not null](img/node-red-switch-node-check-user-id.png)
 Enfin, le dernier nœud est un `call service node` utilisant le service `start` sur le minuteur de l’unité, avec sa durée par défaut (60 minutes) :  
 ![Node-RED call service node start timer with default duration](img/node-red-call-service-node-start-unit-timer.png)
 ## TL;DR
 Avec cette configuration, mon système de climatisation est entièrement automatisé, du refroidissement en été au chauffage en hiver, tout en gardant un œil sur le taux d’humidité.
 Cela m’a demandé pas mal de réflexion, d’ajustements et de tests, mais au final je suis vraiment satisfait du résultat. C’est pourquoi je le partage ici, pour vous donner des idées sur ce qu’on peut faire en domotique.
 Si vous pensez que certaines choses pourraient être faites autrement, n’hésitez pas à me contacter pour en discuter ou me proposer de nouvelles idées !
--- a/content/post/6-ac-automation-home-assistant-node-red.md
+++ b/content/post/6-ac-automation-home-assistant-node-red.md
@@ -0,0 +1,700 @@
 ---
 slug: ac-automation-home-assistant-node-red
 title: Full AC Automation with Home Assistant and Node-RED
 description: How I automate my AC with Home Assistant and Node-RED to react to temperature, humidity and all daily events.
 date: 2025-06-27
 draft: false
 tags:
  - home-automation
  - home-assistant
  - node-red
 categories:
  - automation
 ---
 ## Intro
 In my apartment I have a Daikin air conditioning system, to cool it down in summer, but also warm it up in winter. It is composed of 3 indoor units:
 - Living room
 - Master bedroom
 - Hallway (in front of my office and my kid's room)
 I always find it boring to have to turn them on when I needed, I forgot to turn them off when I should and I was constantly chasing the remote.
 What if I could automate it? After all, I already use Home Assistant to control many devices at home, controlling the AC seems natural to me. 
 ### Home Assistant
 Home Assistant is the brain of my smart home. It connects all my devices (lights, sensors, shutters, etc.) under a single interface. What makes it so powerful is the ability to create automations: if something happens, then do something else. Simple things like “turn on the kitchen light when the motion sensor is triggered” are a breeze. For more advanced workflows, it offers YAML-based scripts with conditions, delays, triggers, and templates.
 That said, once automations start getting more complex, like reacting to multiple sensors, time ranges, or presence detection, they can quickly turn into long, hard-to-follow blocks of code. It’s easy to lose track of what does what, especially when you want to tweak just one small part weeks later.
 ### Node-RED
 That’s exactly why I turned to Node-RED. It’s a visual tool that lets you build logic using blocks called “nodes”, which you connect with wires to create flows. Each node performs a small task: trigger at a certain time, check a condition, send a command to a device, etc. Instead of writing YAML, you just drag, drop, and connect.
 Node-RED does not replace Home Assistant, it empowers it. I won't cover the installation of Node-RED neither the integration in HA, I've done that 2 years ago, but for that I remember, this is quite straightforward.
 ## Previous Workflow
 I was already having a good solution to control my AC from Home Assistant with Node-RED, but I wanted to enhance it to also handle the humidity level at home. My current workflow, despite being functional, was not really scalable and quite hard to maintain:  
 ![Ancien workflow Node-RED pour contrôler la climatisation](img/node-red-ha-ac-automation-before.png)
 ## New Workflow
 Instead of tweaking this workflow, I created a new one from scratch, with the same goal in mind: control the AC system by taking into account all available sensors: thermometers, humidity, door sensors, occupant presence, time of day, etc.
 ### Objectives
 The idea is pretty simple: do not having to think about AC while still being efficient.
 That being said, what does that mean? I want to keep the temperature and humidity level in check, whenever I'm here or not. If I open the windows, it should stop blowing. If it is too wet, I want to dry the air. If I turn the AC on or off manually, I don't want it to overwrite my setting. If it's night, I don't need to cool my living-room and I want it quiet, etc.
 To help me achieve that, I'm using 4 [Aqara temperature and humidity sensors](https://eu.aqara.com/en-eu/products/aqara-temperature-and-humidity-sensor), one in each of my main room. I'm also using some [Aqara door sensors](https://eu.aqara.com/en-eu/products/aqara-door-and-window-sensor, to detect it windows are open.
 ### Workflow
 Let me introduce my new AC workflow within Node-RED and explain what it does in detail:  
 ![New Node-RED air conditioning workflow](img/node-red-new-ac-workflow-with-legend.png)
 #### 1. Temperature Sensors
 In the first node, I combined all the temperature sensors together in one `trigger state node`, but I also added humidity levels in addition to the temperature, managed by the sensor. The node then contains 8 entities in a list (2 for each of my sensor). Each time one value change out of these 8 entities, the node is triggered:  
 ![Nœud trigger state dans Node-RED avec les 8 entités](img/node-red-temperature-sensors-trigger-node.png)
 Each of my temperature sensors are named with a color in French, because each has its own color sticker to distinguish them:
 - **Jaune**: Living room
 - **Bleu**: Bedroom
 - **Rouge**: Office
 - **Vert**: Kid's bedroom
 The second node is a `function node` which has the role the determine the room of the sensor (`function node` is written in **JavaScript**):
 ```js
 const association = {
    "temperature_jaune": "salon",
    "temperature_bleu": "chambre",
    "temperature_rouge": "couloir",
    "temperature_vert": "couloir"
 };
 // Match pattern like: sensor.temperature_rouge_temperature
 const match = msg.topic.match(/^sensor\.(.+)_(temperature|humidity)$/);
 if (!match) {
    node.warn("Topic format not recognized: " + msg.topic);
    return null;
 }
 msg.payload = { 
    room: association[match[1]],
    sensor: match[1]
 };
 return msg;
 ```
 For the last node, most of the time, the sensors will send two messages at the same time, one containing the temperature value and the other, the humidity level. I added a `join node` to combined the two messages if they are sent within the same second:  
 ![Join node in Node-RED to merge temperature and humidity](img/node-red-temperature-sensor-join-node.png)
 #### 2. Notification
 It can happen that the temperature sensors are not sending states anymore for some reason. In that case, they will always return their last value, which would lock the associated AC unit.
 The workaround I found effective is to send a notification if the sensor did not send a new value in the last 3 hours. In normal situation, the sensor send an update approximately every 15 minutes.
 The first node is a `function node` a bit tricky which will generate flow variable as timer for each sensor. When the timeout is reach, it sends a message to the next node:
 ```js
 const sensor = msg.payload.sensor;
 const timeoutKey = `watchdog_${sensor}`;
 const messages = {
    "temperature_jaune": {"title": "Température Salon", "message": "Capteur de température du salon semble hors service"},
    "temperature_bleu": {"title": "Température Chambre", "message": "Capteur de température de la chambre semble hors service"},
    "temperature_rouge": {"title": "Température Bureau", "message": "Capteur de température du bureau semble hors service"},
    "temperature_vert": {"title": "Température Raphaël", "message": "Capteur de température de Raphaël semble hors service"}
 };
 // Clear existing timer
 const existing = flow.get(timeoutKey);
 if (existing) clearTimeout(existing);
 // Set new timer
 const timer = setTimeout(() => {
    node.send({
        payload: `⚠️ No update from ${sensor} in 3 hours.`,
        sensor: sensor,
        title: messages[sensor]["title"],
        message: messages[sensor]["message"]
    });
 }, 3 * 60 * 60 * 1000); // 3 hours
 flow.set(timeoutKey, timer);
 return null; // Don't send anything now
 ```
 The second node is a `call service node` which send a notification on my Android device with the value given:  
 ![Node-RED call service node for notification](img/node-red-call-service-node-notification.png)
 #### 3. Temperature Sliders
 To have a control over the temperature without having to change the workflow, I created two Home Assistant helper, as number, which I can adjust for each unit, giving me 6 helpers in total:  
 ![Curseur de température dans Home Assistant pour chaque unité](img/home-assistant-temperature-room-sliders.png)
 These values are the base temperature used for the calculation of the threshold, depending off the offset which I will detail further.
 The first node is a `trigger state node`, with all 6 entities combined. If I change one value, the node is triggered:  
 ![Node-RED trigger state node for sliders](img/node-red-trigger-state-mode-for-sliders.png)
 The second node is a `function node`, to determine the room affected:
 ```js
 const association = {
    "input_number.temp_ete_salon": "salon",
    "input_number.temp_hiver_salon": "salon",
    "input_number.temp_ete_chambre": "chambre",
    "input_number.temp_hiver_chambre": "chambre",
    "input_number.temp_ete_couloir": "couloir",
    "input_number.temp_hiver_couloir": "couloir"
 };
 msg.payload = { room: association[msg.topic] }; 
 return msg;
 ```
 #### 4. Toggles
 In Home Assistant, I'm using other helper but as boolean, the most important is the AC one, where I can manually disable the whole workflow. I have other which are automated, for the time of the day or for detect presence at home.
 I have another `trigger state node` with all my toggles as boolean, including a test button, for debug purpose:  
 ![Node-RED trigger state node for toggles](img/node-red-trigger-state-node-toggles.png)
 As toggles affect the whole apartment and not a single unit, the next node is a `change node`, which set the room value to `partout` (everywhere):  
 ![Node-RED change node to set room to partout](img/node-red-change-node-room-partout.png)
 #### 5. Windows
 The last triggers are my windows, if I open or close a window next to my unit, it triggers the workflow. I have door sensor for some of my doors, but for the hallway unit, I'm using the Velux windows state. Some rooms have more than one, I created a group helper for them.
 The first node is the last `trigger state node`, the returned value is a string which I will have to convert later into boolean:  
 ![Node-RED trigger state node for windows](img/node-red-trigger-state-node-windows.png)
 Connected to it, again a `function node` to select the affect room:
 ```js
 const association = {
    "binary_sensor.groupe_fenetre_salon": "salon",
    "binary_sensor.fenetre_chambre_contact": "chambre",
    "cover.groupe_fenetre_couloir": "couloir"
 };
 msg.payload = { 
    room: association[msg.topic]
 };
 return msg;
 ```
 #### 6. Window Watchdog
 When I open a window, it is not necessarily to let it open for a long time. I could just let the cat out or having a look at my portal. I don't want my AC tuned off as soon as open it. To workaround that I created a watchdog for each unit, to delay the message for some time.
 The first node is a `switch node`, based on the room given by the previous node, it will send the message to the associated watchdog:  
 ![Node-RED switch node based on the room for the watchdog](img/node-red-switch-node-room-selector-watchdog.png)
 After are the watchdogs, `trigger nodes`, which will delay the message by some time and extend the delay if another message if received:  
 ![Node-RED trigger node for window watchdog](img/node-red-trigger-node-window-watchdog.png)
 #### 7. AC Enabled ?
 All these triggers are now entering the computing pipeline, to determine what the system must do with the action. But before, it is checking if the automation is even enabled. I add this kill switch, just in case, but I rarely use it anyway.
 The first node is a `delay node` which regulate the rate of every incoming messages to 1 per second:  
 ![Node-RED delay node to limit the rate to 1 message per second](img/node-red-delay-node-1-msg-per-second.png)
 The second node is a `current state node` which checks if the `climatisation` boolean is enabled:  
 ![Node-RED current state node for climatisation](img/node-red-current-state-node-climatisation-enabled.png)
 #### 8. Room Configuration
 The idea here is to attach the configuration of the room to the message. Each room have their own configuration, which unit is used, which sensors and more importantly, when should they be turned on and off. 
 AC units have 4 mode which can be used:
 - Cool
 - Dry
 - Fan
 - Heat
 To determine which mode should be used, I'm using threshold for each mode and unit fan's speed, with different offset depending the situation. I can then define a offset during the night or when I'm away. I can also set the offset to `disabled`, which will force the unit to shut down.
 The first node is a `switch node`, based on the `room` value, which will route the message to the associated room configuration. When the room is `partout` (everywhere), the message is split to all 3 room configuration:  
 ![Node-RED switch node for room configuration](img/node-red-switch-node-room-config.png)
 It is connected to a `change node` which will attach the configuration to the `room_config`, here an example with the living-room configuration:
 ```json
 {
    "threshold": {
        "cool": {
            "start": {
                "1": 1,
                "2": 1.5,
                "3": 2,
                "4": 2.5,
                "quiet": 0
            },
            "stop": -0.3,
            "target": -1,
            "offset": {
                "absent": 1,
                "vacances": "disabled",
                "fenetre": "disabled",
                "matin": "disabled",
                "jour": 0,
                "soir": 0,
                "nuit": "disabled"
            }
        },
        "dry": {
            "start": {
                "quiet": -1
            },
            "stop": -1.5,
            "offset": {
                "absent": "1.5",
                "vacances": "disabled",
                "fenetre": "disabled",
                "matin": "disabled",
                "jour": 0,
                "soir": 0,
                "nuit": "disabled"
            }
        },
        "fan_only": {
            "start": {
                "1": -0.3,
                "quiet": -0.5
            },
            "stop": -0.7,
            "offset": {
                "absent": "disabled",
                "vacances": "disabled",
                "fenetre": "disabled",
                "matin": "disabled",
                "jour": 0,
                "soir": 0,
                "nuit": "disabled"
            }
        },
        "heat": {
            "start": {
                "1": 0,
                "2": -1.5,
                "quiet": 0
            },
            "stop": 1,
            "target": 1,
            "offset": {
                "absent": -1.5,
                "vacances": -3,
                "fenetre": "disabled",
                "matin": 0,
                "jour": 0,
                "soir": 0,
                "nuit": -1.5
            }
        }
    },
    "unit": "climate.clim_salon",
    "timer": "timer.minuteur_clim_salon",
    "window": "binary_sensor.groupe_fenetre_salon",
    "thermometre": "sensor.temperature_jaune_temperature",
    "humidity": "sensor.temperature_jaune_humidity",
    "temp_ete": "input_number.temp_ete_salon",
    "temp_hiver": "input_number.temp_hiver_salon"
 }
 ```
 #### 9. Computation
 Now that the message has the room configuration attached, we are entering in the computation pipeline. We have the AC unit name, the sensor names, the desired base temperature and the offset to apply. From these values, we will fetch the current state and do the maths.
 The first node is another `delay node` which regulate the rate of incoming messages, because the previous block could have created 3 messages in all rooms are targeted.
 The second is the most important node of the workflow, a `function node` that has multiple tasks:
 - Fetch sensor state for Home Assistant
 - Calculate mode thresholds with given offset
 - Disable modes if conditions are met
 - Inject these values in the payload
 ```js
 // --- Helper: Get Home Assistant state by entity ID ---
 function getState(entityId) {
    return global.get("homeassistant.homeAssistant.states")[entityId]?.state;
 }
 // --- Determine current time period based on sensors ---
 const periods = ["jour", "soir", "nuit", "matin"];
 msg.payload.period = periods.find(p => getState(`binary_sensor.${p}`) === 'on') || 'unknown';
 // --- Determine presence status (absent = inverse of presence) ---
 const vacances = getState("input_boolean.absent");
 const absent = getState("input_boolean.presence") === 'on' ? 'off' : 'on';
 /**
 * Recursively adds the base temperature and offset to all numeric start values in a threshold config
 */
 function applyOffsetToThresholds(threshold, baseTemp, globalOffset) {
    for (const [key, value] of Object.entries(threshold)) {
        if (key === "offset") continue;
        if (typeof value === 'object') {
            applyOffsetToThresholds(value, baseTemp, globalOffset);
        } else {
            threshold[key] += baseTemp + globalOffset;
        }
    }
 }
 /**
 * Calculates the global offset for a mode, based on presence, vacation, window, and time of day
 */
 function calculateGlobalOffset(offsets, modeName, windowState, disabledMap) {
    let globalOffset = 0;
    for (const [key, offsetValue] of Object.entries(offsets)) {
        let conditionMet = false;
        if (key === msg.payload.period) conditionMet = true;
        else if (key === "absent" && absent === 'on') conditionMet = true;
        else if (key === "vacances" && vacances === 'on') conditionMet = true;
        else if ((key === "fenetre" || key === "window") && windowState === 'on') conditionMet = true;
        if (conditionMet) {
            if (offsetValue === 'disabled') {
                disabledMap[modeName] = true;
                return 0; // Mode disabled immediately
            }
            globalOffset += parseFloat(offsetValue);
        }
    }
    return globalOffset;
 }
 /**
 * Main logic: compute thresholds for the specified room using the provided config
 */
 const cfg = msg.payload.room_config;
 const room = msg.payload.room;
 // Normalize window sensor state
 const rawWindow = getState(cfg.window);
 const window = rawWindow === 'open' ? 'on' : rawWindow === 'closed' ? 'off' : rawWindow;
 // Gather temperatures
 const temps = cfg.thermometre.split(',')
    .map(id => parseFloat(getState(id)))
    .filter(v => !isNaN(v));
 const temp_avg = temps.reduce((a, b) => a + b, 0) / temps.length;
 const temp_min = Math.min(...temps);
 const temp_max = Math.max(...temps);
 // Gather humidity
 const humidities = cfg.humidity.split(',')
    .map(id => parseFloat(getState(id)))
    .filter(v => !isNaN(v));
 const humidity_avg = humidities.reduce((a, b) => a + b, 0) / humidities.length;
 const humidity_min = Math.min(...humidities);
 const humidity_max = Math.max(...humidities);
 // Get base temps
 const temp_ete = parseFloat(getState(cfg.temp_ete));
 const temp_hiver = parseFloat(getState(cfg.temp_hiver));
 // Process modes
 const { threshold } = cfg;
 const modes = ["cool", "dry", "fan_only", "heat"];
 const disabled = {};
 for (const mode of modes) {
    const baseTemp = (mode === "heat") ? temp_hiver : temp_ete;
    const globalOffset = calculateGlobalOffset(threshold[mode].offset, mode, window, disabled);
    applyOffsetToThresholds(threshold[mode], baseTemp, globalOffset);
 }
 // Final message
 msg.payload = {
    ...msg.payload,
    unit: cfg.unit,
    timer: cfg.timer,
    threshold,
    window,
    temp: {
        min: temp_min,
        max: temp_max,
        avg: Math.round(temp_avg * 100) / 100
    },
    humidity: {
        min: humidity_min,
        max: humidity_max,
        avg: Math.round(humidity_avg * 100) / 100
    },
    disabled
 };
 return msg;
 ```
 The third node is a `filter node`, which drops subsequent messages with similar payload:  
 ![Node-RED filter node to block similar message](img/node-red-filter-node-blocker.png)
 The fourth node checks if any lock is set, with a `current state node`, we verify if the timer associated to the unit is idle. If not, the message is discarded:  
 ![Node-RED current state node for timer lock](img/node-red-current-state-node-lock-timer.png)
 The last node is another `current state node` which will fetch the unit state and properties:  
 ![Node-RED current state node to get current unit state](img/node-red-current-state-node-get-unit-state.png)
 #### 10. Target State
 After the computation, we want to determine what should be the target mode, what action to do to converge from the current mode and, if apply, what should be the fan's speed for that mode.
 All three nodes are `function nodes`, the first one decides what should be the target mode, between: `off`, `cool`, `dry`, `fan_only` and `heat`:
 ```js
 const minHumidityThreshold = 52;
 const maxHumidityThreshold = 57;
 // Helper: check if mode can be activated or stopped
 function isModeEligible(mode, temps, humidity, thresholds, currentMode) {
    const isCurrent = (mode === currentMode);
    const threshold = thresholds[mode];
    if (msg.payload.disabled?.[mode]) return false;
    // Determine which temperature to use for start/stop:
    // start: temp.max (except heat uses temp.min)
    // stop: temp.avg
    let tempForCheckStart;
    if (mode === "heat") {
        tempForCheckStart = temps.min;  // heat start uses min temp
    } else {
        tempForCheckStart = temps.max;  // others start use max temp
    }
    const tempForCheckStop = temps.avg;
    // Dry mode also depends on humidity thresholds
    // humidity max for start, humidity avg for stop
    let humidityForCheckStart = humidity.max;
    let humidityForCheckStop = humidity.avg;
    // For heat mode (inverted logic)
    if (mode === "heat") {
        if (!isCurrent) {
            const minStart = Math.min(...Object.values(threshold.start));
            return tempForCheckStart < minStart;
        } else {
            return tempForCheckStop < threshold.stop;
        }
    }
    // For dry mode (humidity-dependent)
    if (mode === "dry") {
        // Skip if humidity too low
        if (humidityForCheckStart <= (isCurrent ? minHumidityThreshold : maxHumidityThreshold)) return false;
        const minStart = Math.min(...Object.values(threshold.start));
        if (!isCurrent) {
            return tempForCheckStart >= minStart;
        } else {
            return tempForCheckStop >= threshold.stop;
        }
    }
    // For cool and fan_only
    if (!isCurrent) {
        const minStart = Math.min(...Object.values(threshold.start));
        return tempForCheckStart >= minStart;
    } else {
        return tempForCheckStop >= threshold.stop;
    }
 }
 // --- Main logic ---
 const { threshold, temp, humidity, current_mode, disabled } = msg.payload;
 const priority = ["cool", "dry", "fan_only", "heat"];
 let target_mode = "off";
 // Loop through priority list and stop at the first eligible mode
 for (const mode of priority) {
    if (isModeEligible(mode, temp, humidity, threshold, current_mode)) {
        target_mode = mode;
        break;
    }
 }
 msg.payload.target_mode = target_mode;
 if (target_mode === "cool" || target_mode === "heat") {
  msg.payload.set_temp = true;
 }
 return msg;
 ```
 The second compares the current and target node and pick which action to take:
 - **check**: current and target are the same.
 - **start**: the AC unit is currently off, but the target is different.
 - **change**: the AC unit is on, the target mode is different, but not `off`.
 - **stop**: the AC unit is on and it is required to stop it.
 ```js
 let action = "check"; // default if both are same
 if (msg.payload.current_mode === "off" && msg.payload.target_mode !== "off") {
    action = "start";
 } else if (msg.payload.current_mode !== "off" && msg.payload.target_mode !== "off" && msg.payload.current_mode !== msg.payload.target_mode) {
    action = "change";
 } else if (msg.payload.current_mode !== "off" && msg.payload.target_mode === "off") {
    action = "stop";
 }
 msg.payload.action = action;
 return msg;
 ```
 The last node determines the fan's speed of the target mode based on thresholds:
 ```js
 // Function to find the appropriate speed key based on temperature and mode
 function findSpeed(thresholdStart, temperature, mode) {
  let closestSpeed = 'quiet';
  let closestTemp = mode === 'heat' ? Infinity : -Infinity;
  for (const speedKey in thresholdStart) {
    if (speedKey !== 'quiet') {
      const tempValue = thresholdStart[speedKey];
      if (mode === 'heat') {
        if (tempValue >= temperature && tempValue <= closestTemp) {
          closestSpeed = speedKey;
          closestTemp = tempValue;
        }
      } else { // cool, fan_only
        if (tempValue <= temperature && tempValue >= closestTemp) {
          closestSpeed = speedKey;
          closestTemp = tempValue;
        }
      }
    }
  }
  return closestSpeed;
 }
 if (msg.payload.target_mode && msg.payload.target_mode !== "off" && msg.payload.target_mode !== "dry") {
  const modeData = msg.payload.threshold[msg.payload.target_mode];
  if (modeData && modeData.start) {
    if (msg.payload.target_mode === "heat") {
      msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.min, 'heat');
    } else {
      msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.max, 'cool');
    }
  } else {
    node.error("Invalid mode data or missing 'start' thresholds", msg);
  }
 } else {
  // No need for speed in 'off' or 'dry' modes
  msg.payload.speed = null;
 }
 return msg;
 ```
 #### 11. Action Switch
 Based on the action to take, the `switch node` will route the message accordingly:  
 ![Node-RED `switch node` pour sélectionner l’action](img/node-red-switch-node-select-action.png)
 #### 12. Start
 When the action is `start`, we first need to turn the unit online, while this takes between 20 to 40 seconds depending on the unit model, it is also locking the unit for a short period for future messages.
 The first node is a `call service node` using the `turn_on` service on the AC unit:  
 ![Node-RED call service node with turn_on service](img/node-red-call-service-node-turn-on.png)
 The second node is another `call service node` which will start the lock timer of this unit for 45 seconds:  
 ![Node-RED call service node to start the unit timer](img/node-red-call-service-node-start-timer.png)
 The last one is a `delay node` of 5 seconds, to give the time to the Home Assistant Daikin integration to resolve the new state.
 #### 13. Change
 The `change` action is used to change from one mode to another, but also used right after the start action.
 The first node is a `call service node` using `the set_hvac_mode` service on the AC unit:  
 ![Node-RED call service node with set_hvac_mode service](img/node-red-call-service-node-set-hvac-mode.png)
 The following node is another delay of 5 seconds.
 The last one verify with a `switch node` if the target temperature needs to be set, this is only required for the modes `cool` and `heat`:  
 ![Node-RED switch node for set_temp](img/node-red-switch-node-set-temp.png)
 #### 14. Set Target Temperature
 The target temperature is only relevant for `cool` and `heat` mode, when you use a normal AC unit, you define a temperature to reach. This is exactly what is defined here. But because each unit is using its own internal sensor to verify, I don't trust it. If the value is already reached, the unit won't blow anything.
 The first node is another `call service node` using the `set_temperature` service:  
 ![Node-RED call service node with set_temperature service](img/node-red-call-service-node-set-temperature-service.png)
 Again, this node is followed by a `delay node` of 5 seconds
 #### 15. Check
 The `check` action is almost used everytime, it is actually only checks and compare the desired fan speed, it changes the fan speed if needed.
 The first node is a `switch node` which verify if the `speed` is defined:  
 ![Node-RED switch node to test if speed is defined](img/node-red-switch-node-fan-speed.png)
 The second is another `switch node` to compare the `speed` value with the current speed:  
 ![Node-Red switch node to compare speed](img/node-red-switch-node-compare-speed.png)
 Finally the last node is a `call service node` using the `set_fan_mode` to set the fan speed:  
 ![Node-RED call service node with set_fan_mode](img/node-red-call-service-node-set-fan-mode.png)
 #### 16. Stop
 When the `action` is stop, the AC unit is simply turned off
 The first node is a `call service noded` using the service `turn_off`:  
 ![Node-RED call service node with turn_off service](img/node-red-call-service-node-turn-off.png)
 The second node is another `call service node` which will start the lock timer of this unit for 45 seconds
 #### 17. Manual Intervention
 Sometime, for some reason, we want to use the AC manually. When we do, we don't want the workflow to change our manual setting, at least for some time. Node-RED is using its own user in Home Assistant, so when an AC unit change state without this user, this was manually done.
 The first node is a `trigger state node`, which will send a message when any AC unit is changing state:  
 ![node-red-trigger-state-unit-change.png](img/node-red-trigger-state-unit-change.png)
 The second is a `function node` which willassociate the unit with its timer:
 ```js
 const association = {
    "climate.clim_salon": "timer.minuteur_clim_salon",
    "climate.clim_chambre": "timer.minuteur_clim_chambre",
    "climate.clim_couloir": "timer.minuteur_clim_couloir"
 };
 msg.payload = association[msg.topic]; 
 return msg;
 ```
 The third is a `switch node` that will let through the message when the user_id is not the Node-RED user's one:  
 ![Node-RED switch node not specific user_id](img/node-red-switch-node-user-id.png)
 The fourth is another `switch node` which checks if there are any `user_id`:  
 ![Node-RED switch node check user_id not null](img/node-red-switch-node-check-user-id.png)
 Lastly, the final node is a `call service node` using `start` service on the unit's timer with its default duration (60 minutes):  
 ![Node-RED call service node start timer with default duration](img/node-red-call-service-node-start-unit-timer.png)
 ## TL;DR
 With this setup, my AC system is fully automated, from  cooling in summer to warming in winter, while keeping in check the humidity level.
 This required quite a lot of thinking, tweaking and testing, but finally I'm now very happy with the results, that's why I'm sharing it with you, to give you some ideas about what you can do in home automation.
 If you think I could have done things differently, please reach out to me to discuss about it, do not hesitate to share your ideas as well!
--- a/content/post/7-terraform-create-proxmox-module.fr.md
+++ b/content/post/7-terraform-create-proxmox-module.fr.md
@@ -0,0 +1,757 @@
 ---
 slug: terraform-create-proxmox-module
 title: Créer un Module Terraform pour Proxmox
 description: Transformez votre code VM Proxmox en module Terraform réutilisable et apprenez à déployer à l'échelle sur plusieurs nœuds.
 date: 2025-07-04
 draft: false
 tags:
  - terraform
  - proxmox
  - cloud-init
 categories:
  - homelab
 ---
 ## Intro
 Dans un [article précédent]({{< ref "post/3-terraform-create-vm-proxmox" >}}), j’expliquais comment déployer des **machines virtuelles** sur **Proxmox** à l’aide de **Terraform**, en partant d’un [template cloud-init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
 Dans ce post, nous allons transformer ce code en un **module Terraform** réutilisable. Ensuite, je montrerai comment utiliser ce module dans d'autres projets pour simplifier et faire évoluer vos déploiements d'infrastructure.
 ---
 ## Qu’est-ce qu’un Module Terraform ?
 Les modules Terraform sont des composants réutilisables qui permettent d’organiser et de simplifier votre code d’infrastructure en regroupant des ressources liées dans une seule unité. Au lieu de répéter la même configuration à plusieurs endroits, vous pouvez la définir une fois dans un module, puis l’utiliser là où vous en avez besoin, comme une fonction en programmation.
 Les modules peuvent être locaux (dans votre projet) ou distants (depuis le Terraform Registry ou un dépôt Git), ce qui facilite le partage et la standardisation des patterns d’infrastructure entre les équipes ou projets. Grâce aux modules, votre code devient plus lisible, maintenable et évolutif.
 ---
 ## Transformer le Projet en Module
 Nous allons maintenant extraire le code Terraform du [projet précédent]({{< ref "post/3-terraform-create-vm-proxmox" >}}) pour en faire un module réutilisable nommé `pve_vm`.
 > 📌 Vous pouvez retrouver le code source complet dans mon [dépôt Homelab](https://github.com/Vezpi/Homelab/). Le code spécifique à cet article se trouve [ici](https://github.com/Vezpi/Homelab/tree/3a991010d5e9de30e12cbf365d1a1ca1ff1f6436/terraform). Pensez à adapter les variables à votre environnement.
 ### Structure du Code
 Notre module vivra à côté des projets, dans un dossier séparé.
 ```plaintext
 terraform
 `-- modules
    `-- pve_vm
        |-- main.tf
        |-- provider.tf
        `-- variables.tf
 ```
 ### Code du Module
 📝 Les fichiers du module sont essentiellement les mêmes que ceux du projet que nous transformons. Les providers y sont déclarés, mais non configurés.
 Le module `pve_vm` sera composé de 3 fichiers :
 - **main** : la logique principale, identique à celle du projet.
 - **provider** : déclare les providers requis, sans les configurer.
 - **variables** : déclare les variables du module, en excluant celles propres au provider.
 #### `main.tf`
 ```hcl
 # Retrieve VM templates available in Proxmox that match the specified name
 data "proxmox_virtual_environment_vms" "template" {
  filter {
    name   = "name"
    values = ["${var.vm_template}"] # The name of the template to clone from
  }
 }
 # Create a cloud-init configuration file as a Proxmox snippet
 resource "proxmox_virtual_environment_file" "cloud_config" {
  content_type = "snippets"        # Cloud-init files are stored as snippets in Proxmox
  datastore_id = "local"           # Local datastore used to store the snippet
  node_name    = var.node_name     # The Proxmox node where the file will be uploaded
  source_raw {
    file_name = "${var.vm_name}.cloud-config.yaml" # The name of the snippet file
    data      = <<-EOF
    #cloud-config
    hostname: ${var.vm_name}
    package_update: true
    package_upgrade: true
    packages:
      - qemu-guest-agent           # Ensures the guest agent is installed
    users:
      - default
      - name: ${var.vm_user}
        groups: sudo
        shell: /bin/bash
        ssh-authorized-keys:
          - "${var.vm_user_sshkey}" # Inject user's SSH key
        sudo: ALL=(ALL) NOPASSWD:ALL
    runcmd:
      - systemctl enable qemu-guest-agent 
      - reboot                     # Reboot the VM after provisioning
    EOF
  }
 }
 # Define and provision a new VM by cloning the template and applying initialization
 resource "proxmox_virtual_environment_vm" "vm" {
  name      = var.vm_name           # VM name
  node_name = var.node_name         # Proxmox node to deploy the VM
  tags      = var.vm_tags           # Optional VM tags for categorization
  agent {
    enabled = true                  # Enable the QEMU guest agent
  }
  stop_on_destroy = true            # Ensure VM is stopped gracefully when destroyed
  clone {
    vm_id     = data.proxmox_virtual_environment_vms.template.vms[0].vm_id     # ID of the source template
    node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
  }
  bios    = var.vm_bios             # BIOS type (e.g., seabios or ovmf)
  machine = var.vm_machine          # Machine type (e.g., q35)
  cpu {
    cores = var.vm_cpu              # Number of CPU cores
    type  = "host"                  # Use host CPU type for best compatibility/performance
  }
  memory {
    dedicated = var.vm_ram          # RAM in MB
  }
  disk {
    datastore_id = var.node_datastore # Datastore to hold the disk
    interface    = "scsi0"             # Primary disk interface
    size         = 4                   # Disk size in GB
  }
  initialization {
    user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
    datastore_id      = var.node_datastore
    interface         = "scsi1"             # Separate interface for cloud-init
    ip_config {
      ipv4 {
        address = "dhcp"            # Get IP via DHCP
      }
    }
  }
  network_device {
    bridge  = "vmbr0"               # Use the default bridge
    vlan_id = var.vm_vlan           # VLAN tagging if used
  }
  operating_system {
    type = "l26"                    # Linux 2.6+ kernel
  }
  vga {
    type = "std"                    # Standard VGA type
  }
  lifecycle {
    ignore_changes = [              # Ignore initialization section after first depoloyment for idempotency
      initialization
    ]
  }
 }
 # Output the assigned IP address of the VM after provisioning
 output "vm_ip" {
  value       = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
  description = "VM IP"
 }
 ```
 #### `provider.tf`
 ```hcl
 terraform {
  required_providers {
    proxmox = {
      source = "bpg/proxmox"
    }
  }
 }
 ```
 #### `variables.tf`
 > ⚠️ The defaults are based on my environment, adapt them to yours.
 ```hcl
 variable "node_name" {
  description = "Proxmox host for the VM"
  type        = string
 }
 variable "node_datastore" {
  description = "Datastore used for VM storage"
  type        = string
  default     = "ceph-workload"
 }
 variable "vm_template" {
  description = "Template of the VM"
  type        = string
  default     = "ubuntu-cloud"
 }
 variable "vm_name" {
  description = "Hostname of the VM"
  type        = string
 }
 variable "vm_user" {
  description = "Admin user of the VM"
  type        = string
  default     = "vez"
 }
 variable "vm_user_sshkey" {
  description = "Admin user SSH key of the VM"
  type        = string
  default     = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
 }
 variable "vm_cpu" {
  description = "Number of CPU cores of the VM"
  type        = number
  default     = 1
 }
 variable "vm_ram" {
  description = "Number of RAM (MB) of the VM"
  type        = number
  default     = 2048
 }
 variable "vm_bios" {
  description = "Type of BIOS used for the VM"
  type        = string
  default     = "ovmf"
 }
 variable "vm_machine" {
  description = "Type of machine used for the VM"
  type        = string
  default     = "q35"
 }
 variable "vm_vlan" {
  description = "VLAN of the VM"
  type        = number
  default     = 66
 }
 variable "vm_tags" {
  description = "Tags for the VM"
  type        = list(any)
  default     = ["test"]
 }
 ```
 ---
 ## Déployer une VM à l’aide du Module
 Maintenant que nous avons extrait toute la logique dans le module `pve_vm`, notre projet n’a plus qu’à appeler ce module en lui passant les variables nécessaires. Cela rend la configuration bien plus propre et facile à maintenir.
 ### Structure du Code
 Voici à quoi cela ressemble :
 ```plaintext
 terraform
 |-- modules
 |   `-- pve_vm
 |       |-- main.tf
 |       |-- provider.tf
 |       `-- variables.tf
 `-- projects
    `-- simple-vm-with-module
        |-- credentials.auto.tfvars
        |-- main.tf
        |-- provider.tf
        `-- variables.tf
 ```
 ### Code du projet
 Dans cet exemple, je fournis manuellement les valeurs lors de l’appel du module. Le provider est configuré au niveau du projet.
 #### `main.tf`
 ```hcl
 module "pve_vm" {
  source            = "../../modules/pve_vm"
  node_name         = "zenith"
  vm_name           = "zenith-vm"
  vm_cpu            = 2
  vm_ram            = 2048
  vm_vlan           = 66
 }
 output "vm_ip" {
  value = module.pve_vm.vm_ip
 }
 ```
 #### `provider.tf`
 ```hcl
 terraform {
  required_providers {
    proxmox = {
      source = "bpg/proxmox"
    }
  }
 }
 provider "proxmox" {
  endpoint  = var.proxmox_endpoint
  api_token = var.proxmox_api_token
  insecure  = false
  ssh {
    agent       = false
    private_key = file("~/.ssh/id_ed25519")
    username    = "root"
  }
 }
 ```
 #### `variables.tf`
 ```hcl
 variable "proxmox_endpoint" {
  description = "Proxmox URL endpoint"
  type        = string
 }
 variable "proxmox_api_token" {
  description = "Proxmox API token"
  type        = string
  sensitive   = true
 }
 ```
 #### `credentials.auto.tfvars`
 ```hcl
 proxmox_endpoint  = <your Proxox endpoint>
 proxmox_api_token = <your Proxmox API token for the user terraformer>
 ```
 ### Initialiser le Workspace Terraform
 Dans notre nouveau projet, il faut d’abord initialiser l’environnement Terraform avec `terraform init` :
 ```bash
 $ terraform init
 Initializing the backend...
 Initializing modules...
 - pve_vm in ../../modules/pve_vm
 Initializing provider plugins...
 - Finding latest version of bpg/proxmox...
 - Installing bpg/proxmox v0.78.2...
 - Installed bpg/proxmox v0.78.2 (self-signed, key ID F0582AD6AE97C188)
 Partner and community providers are signed by their developers.
 If you'd like to know more about provider signing, you can read about it here:
 https://www.terraform.io/docs/cli/plugins/signing.html
 Terraform has created a lock file .terraform.lock.hcl to record the provider
 selections it made above. Include this file in your version control repository
 so that Terraform can guarantee to make the same selections by default when
 you run "terraform init" in the future.
 Terraform has been successfully initialized!
 You may now begin working with Terraform. Try running "terraform plan" to see
 any changes that are required for your infrastructure. All Terraform commands
 should now work.
 If you ever set or change modules or backend configuration for Terraform,
 rerun this command to reinitialize your working directory. If you forget, other
 commands will detect it and remind you to do so if necessary.
 ```
 ### Déployer la VM
 Avant le déploiement, vérifiez que tout est correct avec `terraform plan`.
 Une fois prêt, lancez le déploiement avec `terraform apply` :
 ```bash
 $ terraform apply
 module.pve_vm.data.proxmox_virtual_environment_vms.template: Reading...
 module.pve_vm.data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=89b444be-7501-4538-9436-08609b380d39]
 Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
  + create
 Terraform will perform the following actions:
  # module.pve_vm.proxmox_virtual_environment_file.cloud_config will be created
  + resource "proxmox_virtual_environment_file" "cloud_config" {
      + content_type           = "snippets"
      + datastore_id           = "local"
      + file_modification_date = (known after apply)
      + file_name              = (known after apply)
      + file_size              = (known after apply)
      + file_tag               = (known after apply)
      + id                     = (known after apply)
      + node_name              = "zenith"
      + overwrite              = true
      + timeout_upload         = 1800
      + source_raw {
          + data      = <<-EOT
                #cloud-config
                hostname: zenith-vm
                package_update: true
                package_upgrade: true
                packages:
                  - qemu-guest-agent
                users:
                  - default
                  - name: vez
                    groups: sudo
                    shell: /bin/bash
                    ssh-authorized-keys:
                      - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
                    sudo: ALL=(ALL) NOPASSWD:ALL
                runcmd:
                  - systemctl enable qemu-guest-agent
                  - reboot
            EOT
          + file_name = "zenith-vm.cloud-config.yaml"
          + resize    = 0
        }
    }
  # module.pve_vm.proxmox_virtual_environment_vm.vm will be created
  + resource "proxmox_virtual_environment_vm" "vm" {
      + acpi                    = true
      + bios                    = "ovmf"
      + id                      = (known after apply)
      + ipv4_addresses          = (known after apply)
      + ipv6_addresses          = (known after apply)
      + keyboard_layout         = "en-us"
      + mac_addresses           = (known after apply)
      + machine                 = "q35"
      + migrate                 = false
      + name                    = "zenith-vm"
      + network_interface_names = (known after apply)
      + node_name               = "zenith"
      + on_boot                 = true
      + protection              = false
      + reboot                  = false
      + reboot_after_update     = true
      + scsi_hardware           = "virtio-scsi-pci"
      + started                 = true
      + stop_on_destroy         = true
      + tablet_device           = true
      + tags                    = [
          + "test",
        ]
      + template                = false
      + timeout_clone           = 1800
      + timeout_create          = 1800
      + timeout_migrate         = 1800
      + timeout_move_disk       = 1800
      + timeout_reboot          = 1800
      + timeout_shutdown_vm     = 1800
      + timeout_start_vm        = 1800
      + timeout_stop_vm         = 300
      + vm_id                   = (known after apply)
      + agent {
          + enabled = true
          + timeout = "15m"
          + trim    = false
          + type    = "virtio"
        }
      + clone {
          + full      = true
          + node_name = "apex"
          + retries   = 1
          + vm_id     = 900
        }
      + cpu {
          + cores      = 2
          + hotplugged = 0
          + limit      = 0
          + numa       = false
          + sockets    = 1
          + type       = "host"
          + units      = 1024
        }
      + disk {
          + aio               = "io_uring"
          + backup            = true
          + cache             = "none"
          + datastore_id      = "ceph-workload"
          + discard           = "ignore"
          + file_format       = (known after apply)
          + interface         = "scsi0"
          + iothread          = false
          + path_in_datastore = (known after apply)
          + replicate         = true
          + size              = 4
          + ssd               = false
        }
      + initialization {
          + datastore_id         = "ceph-workload"
          + interface            = "scsi1"
          + meta_data_file_id    = (known after apply)
          + network_data_file_id = (known after apply)
          + type                 = (known after apply)
          + user_data_file_id    = (known after apply)
          + vendor_data_file_id  = (known after apply)
          + ip_config {
              + ipv4 {
                  + address = "dhcp"
                }
            }
        }
      + memory {
          + dedicated      = 2048
          + floating       = 0
          + keep_hugepages = false
          + shared         = 0
        }
      + network_device {
          + bridge      = "vmbr0"
          + enabled     = true
          + firewall    = false
          + mac_address = (known after apply)
          + model       = "virtio"
          + mtu         = 0
          + queues      = 0
          + rate_limit  = 0
          + vlan_id     = 66
        }
      + operating_system {
          + type = "l26"
        }
      + vga {
          + memory = 16
          + type   = "std"
        }
    }
 Plan: 2 to add, 0 to change, 0 to destroy.
 Changes to Outputs:
  + vm_ip = (known after apply)
 Do you want to perform these actions?
  Terraform will perform the actions described above.
  Only 'yes' will be accepted to approve.
  Enter a value: yes
 module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creating...
 module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/zenith-vm.cloud-config.yaml]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Creating...
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Creation complete after 3m13s [id=103]
 Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
 Outputs:
 vm_ip = "192.168.66.159"
 ```
 ✅ La VM est maintenant prête !
 ![VM on Proxmox WebUI deployed using a Terraform module](img/proxmox-vm-deployed-using-terraform-module.png)
 🕗 _Ne faites pas attention à l’uptime, j’ai pris la capture d’écran le lendemain._
 ---
 ## Déployer Plusieurs VMs à la fois
 Très bien, on a déployé une seule VM. Mais maintenant, comment passer à l’échelle ? Comment déployer plusieurs instances de ce template, avec des noms différents, sur des nœuds différents, et avec des tailles différentes ? C’est ce que je vais vous montrer.
 ### Une VM par Nœud
 Dans l’exemple précédent, nous avons passé des valeurs fixes au module. À la place, nous pouvons définir un objet local contenant les caractéristiques de la VM, puis s’en servir lors de l’appel au module. Cela facilite l’évolution du code de déploiement :
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  node_name = local.vm.node_name
  vm_name   = local.vm.vm_name
  vm_cpu    = local.vm.vm_cpu
  vm_ram    = local.vm.vm_ram
  vm_vlan   = local.vm.vm_vlan
 }
 locals {
  vm = {
    node_name = "zenith"
    vm_name   = "zenith-vm"
    vm_cpu    = 2
    vm_ram    = 2048
    vm_vlan   = 66
  }
 }
 ```
 Nous pouvons également appeler le module en itérant sur une liste d’objets définissant les VMs à déployer :
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  for_each  = local.vm_list
  node_name = each.value.node_name
  vm_name   = each.value.vm_name
  vm_cpu    = each.value.vm_cpu
  vm_ram    = each.value.vm_ram
  vm_vlan   = each.value.vm_vlan
 }
 locals {
  vm_list = {
    zenith = {
      node_name = "zenith"
      vm_name   = "zenith-vm"
      vm_cpu    = 2
      vm_ram    = 2048
      vm_vlan   = 66
    }
  }
 }
 ```
 Bien que cela n'ait pas de sens avec une seule VM, je pourrais utiliser cette syntaxe de module, par exemple, pour déployer une machine virtuelle par nœud :
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  for_each  = local.vm_list
  node_name = each.value.node_name
  vm_name   = each.value.vm_name
  vm_cpu    = each.value.vm_cpu
  vm_ram    = each.value.vm_ram
  vm_vlan   = each.value.vm_vlan
 }
 locals {
  vm_list = {
    for vm in flatten([
      for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : {
        node_name = node
        vm_name   = "${node}-vm"
        vm_cpu    = 2
        vm_ram    = 2048
        vm_vlan   = 66
      }
    ]) : vm.vm_name => vm
  }
 }
 data "proxmox_virtual_environment_nodes" "pve_nodes" {}
 output "vm_ip" {
  value = { for k, v in module.pve_vm : k => v.vm_ip }
 }
 ```
 ✅ Cela permet de déployer automatiquement 3 VM dans mon cluster, une par nœud.
 ### Plusieurs VMs par Nœud
 Enfin, poussons l’idée plus loin : déployons plusieurs VMs avec des configurations différentes par nœud. Pour cela, on définit un ensemble de rôles et on utilise une boucle imbriquée pour générer toutes les combinaisons possibles pour chaque nœud Proxmox.
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  for_each  = local.vm_list
  node_name = each.value.node_name
  vm_name   = each.value.vm_name
  vm_cpu    = each.value.vm_cpu
  vm_ram    = each.value.vm_ram
  vm_vlan   = each.value.vm_vlan
 }
 locals {
  vm_attr = {
    "master" = { ram = 2048, cpu = 2, vlan = 66 }
    "worker" = { ram = 1024, cpu = 1, vlan = 66 }
  }
  vm_list = {
    for vm in flatten([
      for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : [
        for role, config in local.vm_attr : {
          node_name = node
          vm_name   = "${node}-${role}"
          vm_cpu    = config.cpu
          vm_ram    = config.ram
          vm_vlan   = config.vlan
        }
      ]
    ]) : vm.vm_name => vm
  }
 }
 data "proxmox_virtual_environment_nodes" "pve_nodes" {}
 output "vm_ip" {
  value = { for k, v in module.pve_vm : k => v.vm_ip }
 }
 ```
 🚀 Une fois le `terraform apply` lancé, j'obtiens ça :
 ```bash
 Apply complete! Resources: 6 added, 0 changed, 0 destroyed.
 Outputs:
 vm_ip = {
  "apex-master" = "192.168.66.167"
  "apex-worker" = "192.168.66.168"
  "vertex-master" = "192.168.66.169"
  "vertex-worker" = "192.168.66.170"
  "zenith-master" = "192.168.66.166"
  "zenith-worker" = "192.168.66.172"
 }
 ```
 ---
 ## Conclusion
 Nous avons transformé notre déploiement de VM Proxmox en un module Terraform réutilisable, et nous l’avons utilisé pour faire évoluer facilement notre infrastructure sur plusieurs nœuds.
 Dans un prochain article, j’aimerais combiner Terraform avec Ansible afin de gérer le déploiement des VMs, et même explorer l’utilisation de différents workspaces Terraform pour gérer plusieurs environnements.
 A la prochaine !
--- a/content/post/7-terraform-create-proxmox-module.md
+++ b/content/post/7-terraform-create-proxmox-module.md
@@ -0,0 +1,750 @@
 ---
 slug: terraform-create-proxmox-module
 title: Create a Terraform module for Proxmox
 description: Turn your Proxmox VM code into a reusable Terraform module and learn how to scale deployments across multiple nodes.
 date: 2025-07-04
 draft: false
 tags:
  - terraform
  - proxmox
  - cloud-init
 categories:
  - homelab
 ---
 ## Intro
 In a [previous post]({{< ref "post/3-terraform-create-vm-proxmox" >}}), I explained how to deploy **Virtual Machines** on **Proxmox** using **Terraform**, building from a [cloud-init template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
 In this post, we’ll take that code and turn it into a reusable **Terraform module**. Then, I’ll show how to use that module in other projects to simplify and scale your infrastructure deployments.
 ---
 ## What is a Terraform Module?
 Terraform modules are reusable components that let you organize and simplify your infrastructure code by grouping related resources into a single unit. Instead of repeating the same configuration across multiple places, you can define it once in a module and use it wherever needed, just like calling a function in programming.
 Modules can be local (within your project) or remote (from the Terraform Registry or a Git repository), making it easy to share and standardize infrastructure patterns across teams or projects. By using modules, you make your code more readable, maintainable, and scalable.
 ---
 ## Transform Project into Module
 We're now going to extract the Terraform code from the [previous project]({{< ref "post/3-terraform-create-vm-proxmox" >}}) into a reusable module named `pve_vm`.
 > 📌 You can find the full source code in my [Homelab repo](https://github.com/Vezpi/Homelab/). The specific code for this post lives [here](https://github.com/Vezpi/Homelab/tree/3a991010d5e9de30e12cbf365d1a1ca1ff1f6436/terraform). Make sure to adjust the variables to match your environment.
 ### Code Structure
 Our module will live next to our projects, in another folder:
 ```plaintext
 terraform
 `-- modules
    `-- pve_vm
        |-- main.tf
        |-- provider.tf
        `-- variables.tf
 ```
 ### Module's Code
 📝 Basically, the module files are the same as the project files we are transforming. Providers are declared, but not configured, inside the module.
 The module `pve_vm` will be composed of 3 files:
 - **main**: The core logic, same code as before.
 - **provider**: Declares required providers without configuration.
 - **variables**: Declares module variables, excluding provider-specific ones.
 #### `main.tf`
 ```hcl
 # Retrieve VM templates available in Proxmox that match the specified name
 data "proxmox_virtual_environment_vms" "template" {
  filter {
    name   = "name"
    values = ["${var.vm_template}"] # The name of the template to clone from
  }
 }
 # Create a cloud-init configuration file as a Proxmox snippet
 resource "proxmox_virtual_environment_file" "cloud_config" {
  content_type = "snippets"        # Cloud-init files are stored as snippets in Proxmox
  datastore_id = "local"           # Local datastore used to store the snippet
  node_name    = var.node_name     # The Proxmox node where the file will be uploaded
  source_raw {
    file_name = "${var.vm_name}.cloud-config.yaml" # The name of the snippet file
    data      = <<-EOF
    #cloud-config
    hostname: ${var.vm_name}
    package_update: true
    package_upgrade: true
    packages:
      - qemu-guest-agent           # Ensures the guest agent is installed
    users:
      - default
      - name: ${var.vm_user}
        groups: sudo
        shell: /bin/bash
        ssh-authorized-keys:
          - "${var.vm_user_sshkey}" # Inject user's SSH key
        sudo: ALL=(ALL) NOPASSWD:ALL
    runcmd:
      - systemctl enable qemu-guest-agent 
      - reboot                     # Reboot the VM after provisioning
    EOF
  }
 }
 # Define and provision a new VM by cloning the template and applying initialization
 resource "proxmox_virtual_environment_vm" "vm" {
  name      = var.vm_name           # VM name
  node_name = var.node_name         # Proxmox node to deploy the VM
  tags      = var.vm_tags           # Optional VM tags for categorization
  agent {
    enabled = true                  # Enable the QEMU guest agent
  }
  stop_on_destroy = true            # Ensure VM is stopped gracefully when destroyed
  clone {
    vm_id     = data.proxmox_virtual_environment_vms.template.vms[0].vm_id     # ID of the source template
    node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
  }
  bios    = var.vm_bios             # BIOS type (e.g., seabios or ovmf)
  machine = var.vm_machine          # Machine type (e.g., q35)
  cpu {
    cores = var.vm_cpu              # Number of CPU cores
    type  = "host"                  # Use host CPU type for best compatibility/performance
  }
  memory {
    dedicated = var.vm_ram          # RAM in MB
  }
  disk {
    datastore_id = var.node_datastore # Datastore to hold the disk
    interface    = "scsi0"             # Primary disk interface
    size         = 4                   # Disk size in GB
  }
  initialization {
    user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
    datastore_id      = var.node_datastore
    interface         = "scsi1"             # Separate interface for cloud-init
    ip_config {
      ipv4 {
        address = "dhcp"            # Get IP via DHCP
      }
    }
  }
  network_device {
    bridge  = "vmbr0"               # Use the default bridge
    vlan_id = var.vm_vlan           # VLAN tagging if used
  }
  operating_system {
    type = "l26"                    # Linux 2.6+ kernel
  }
  vga {
    type = "std"                    # Standard VGA type
  }
  lifecycle {
    ignore_changes = [              # Ignore initialization section after first depoloyment for idempotency
      initialization
    ]
  }
 }
 # Output the assigned IP address of the VM after provisioning
 output "vm_ip" {
  value       = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
  description = "VM IP"
 }
 ```
 #### `provider.tf`
 ```hcl
 terraform {
  required_providers {
    proxmox = {
      source = "bpg/proxmox"
    }
  }
 }
 ```
 #### `variables.tf`
 > ⚠️ The defaults are based on my environment, adapt them to yours.
 ```hcl
 variable "node_name" {
  description = "Proxmox host for the VM"
  type        = string
 }
 variable "node_datastore" {
  description = "Datastore used for VM storage"
  type        = string
  default     = "ceph-workload"
 }
 variable "vm_template" {
  description = "Template of the VM"
  type        = string
  default     = "ubuntu-cloud"
 }
 variable "vm_name" {
  description = "Hostname of the VM"
  type        = string
 }
 variable "vm_user" {
  description = "Admin user of the VM"
  type        = string
  default     = "vez"
 }
 variable "vm_user_sshkey" {
  description = "Admin user SSH key of the VM"
  type        = string
  default     = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
 }
 variable "vm_cpu" {
  description = "Number of CPU cores of the VM"
  type        = number
  default     = 1
 }
 variable "vm_ram" {
  description = "Number of RAM (MB) of the VM"
  type        = number
  default     = 2048
 }
 variable "vm_bios" {
  description = "Type of BIOS used for the VM"
  type        = string
  default     = "ovmf"
 }
 variable "vm_machine" {
  description = "Type of machine used for the VM"
  type        = string
  default     = "q35"
 }
 variable "vm_vlan" {
  description = "VLAN of the VM"
  type        = number
  default     = 66
 }
 variable "vm_tags" {
  description = "Tags for the VM"
  type        = list(any)
  default     = ["test"]
 }
 ```
 ---
 ## Deploy a VM Using our Module
 Now that we’ve extracted all the logic into the `pve_vm` module, our project code only needs to reference that module and pass the required variables. This makes our setup much cleaner and easier to maintain.
 ### Code Structure
 Here what it looks like:
 ```plaintext
 terraform
 |-- modules
 |   `-- pve_vm
 |       |-- main.tf
 |       |-- provider.tf
 |       `-- variables.tf
 `-- projects
    `-- simple-vm-with-module
        |-- credentials.auto.tfvars
        |-- main.tf
        |-- provider.tf
        `-- variables.tf
 ```
 ### Project's Code
 In this example, I manually provide the values when calling my module. The provider is configured at project level.
 #### `main.tf`
 ```hcl
 module "pve_vm" {
  source            = "../../modules/pve_vm"
  node_name         = "zenith"
  vm_name           = "zenith-vm"
  vm_cpu            = 2
  vm_ram            = 2048
  vm_vlan           = 66
 }
 output "vm_ip" {
  value = module.pve_vm.vm_ip
 }
 ```
 #### `provider.tf`
 ```hcl
 terraform {
  required_providers {
    proxmox = {
      source = "bpg/proxmox"
    }
  }
 }
 provider "proxmox" {
  endpoint  = var.proxmox_endpoint
  api_token = var.proxmox_api_token
  insecure  = false
  ssh {
    agent       = false
    private_key = file("~/.ssh/id_ed25519")
    username    = "root"
  }
 }
 ```
 #### `variables.tf`
 ```hcl
 variable "proxmox_endpoint" {
  description = "Proxmox URL endpoint"
  type        = string
 }
 variable "proxmox_api_token" {
  description = "Proxmox API token"
  type        = string
  sensitive   = true
 }
 ```
 #### `credentials.auto.tfvars`
 ```hcl
 proxmox_endpoint  = <your Proxox endpoint>
 proxmox_api_token = <your Proxmox API token for the user terraformer>
 ```
 ### Initialize the Terraform Workspace
 In our new project, we first need to initialize the Terraform workspace with `terraform init`:
 ```bash
 $ terraform init
 Initializing the backend...
 Initializing modules...
 - pve_vm in ../../modules/pve_vm
 Initializing provider plugins...
 - Finding latest version of bpg/proxmox...
 - Installing bpg/proxmox v0.78.2...
 - Installed bpg/proxmox v0.78.2 (self-signed, key ID F0582AD6AE97C188)
 Partner and community providers are signed by their developers.
 If you'd like to know more about provider signing, you can read about it here:
 https://www.terraform.io/docs/cli/plugins/signing.html
 Terraform has created a lock file .terraform.lock.hcl to record the provider
 selections it made above. Include this file in your version control repository
 so that Terraform can guarantee to make the same selections by default when
 you run "terraform init" in the future.
 Terraform has been successfully initialized!
 You may now begin working with Terraform. Try running "terraform plan" to see
 any changes that are required for your infrastructure. All Terraform commands
 should now work.
 If you ever set or change modules or backend configuration for Terraform,
 rerun this command to reinitialize your working directory. If you forget, other
 commands will detect it and remind you to do so if necessary.
 ```
 ### Deploy the VM
 Before deploying it, make sure that everything is ok with a `terraform plan`.
 Once ready, you can deploy it with `terraform apply`:
 ```bash
 $ terraform apply
 module.pve_vm.data.proxmox_virtual_environment_vms.template: Reading...
 module.pve_vm.data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=89b444be-7501-4538-9436-08609b380d39]
 Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
  + create
 Terraform will perform the following actions:
  # module.pve_vm.proxmox_virtual_environment_file.cloud_config will be created
  + resource "proxmox_virtual_environment_file" "cloud_config" {
      + content_type           = "snippets"
      + datastore_id           = "local"
      + file_modification_date = (known after apply)
      + file_name              = (known after apply)
      + file_size              = (known after apply)
      + file_tag               = (known after apply)
      + id                     = (known after apply)
      + node_name              = "zenith"
      + overwrite              = true
      + timeout_upload         = 1800
      + source_raw {
          + data      = <<-EOT
                #cloud-config
                hostname: zenith-vm
                package_update: true
                package_upgrade: true
                packages:
                  - qemu-guest-agent
                users:
                  - default
                  - name: vez
                    groups: sudo
                    shell: /bin/bash
                    ssh-authorized-keys:
                      - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
                    sudo: ALL=(ALL) NOPASSWD:ALL
                runcmd:
                  - systemctl enable qemu-guest-agent
                  - reboot
            EOT
          + file_name = "zenith-vm.cloud-config.yaml"
          + resize    = 0
        }
    }
  # module.pve_vm.proxmox_virtual_environment_vm.vm will be created
  + resource "proxmox_virtual_environment_vm" "vm" {
      + acpi                    = true
      + bios                    = "ovmf"
      + id                      = (known after apply)
      + ipv4_addresses          = (known after apply)
      + ipv6_addresses          = (known after apply)
      + keyboard_layout         = "en-us"
      + mac_addresses           = (known after apply)
      + machine                 = "q35"
      + migrate                 = false
      + name                    = "zenith-vm"
      + network_interface_names = (known after apply)
      + node_name               = "zenith"
      + on_boot                 = true
      + protection              = false
      + reboot                  = false
      + reboot_after_update     = true
      + scsi_hardware           = "virtio-scsi-pci"
      + started                 = true
      + stop_on_destroy         = true
      + tablet_device           = true
      + tags                    = [
          + "test",
        ]
      + template                = false
      + timeout_clone           = 1800
      + timeout_create          = 1800
      + timeout_migrate         = 1800
      + timeout_move_disk       = 1800
      + timeout_reboot          = 1800
      + timeout_shutdown_vm     = 1800
      + timeout_start_vm        = 1800
      + timeout_stop_vm         = 300
      + vm_id                   = (known after apply)
      + agent {
          + enabled = true
          + timeout = "15m"
          + trim    = false
          + type    = "virtio"
        }
      + clone {
          + full      = true
          + node_name = "apex"
          + retries   = 1
          + vm_id     = 900
        }
      + cpu {
          + cores      = 2
          + hotplugged = 0
          + limit      = 0
          + numa       = false
          + sockets    = 1
          + type       = "host"
          + units      = 1024
        }
      + disk {
          + aio               = "io_uring"
          + backup            = true
          + cache             = "none"
          + datastore_id      = "ceph-workload"
          + discard           = "ignore"
          + file_format       = (known after apply)
          + interface         = "scsi0"
          + iothread          = false
          + path_in_datastore = (known after apply)
          + replicate         = true
          + size              = 4
          + ssd               = false
        }
      + initialization {
          + datastore_id         = "ceph-workload"
          + interface            = "scsi1"
          + meta_data_file_id    = (known after apply)
          + network_data_file_id = (known after apply)
          + type                 = (known after apply)
          + user_data_file_id    = (known after apply)
          + vendor_data_file_id  = (known after apply)
          + ip_config {
              + ipv4 {
                  + address = "dhcp"
                }
            }
        }
      + memory {
          + dedicated      = 2048
          + floating       = 0
          + keep_hugepages = false
          + shared         = 0
        }
      + network_device {
          + bridge      = "vmbr0"
          + enabled     = true
          + firewall    = false
          + mac_address = (known after apply)
          + model       = "virtio"
          + mtu         = 0
          + queues      = 0
          + rate_limit  = 0
          + vlan_id     = 66
        }
      + operating_system {
          + type = "l26"
        }
      + vga {
          + memory = 16
          + type   = "std"
        }
    }
 Plan: 2 to add, 0 to change, 0 to destroy.
 Changes to Outputs:
  + vm_ip = (known after apply)
 Do you want to perform these actions?
  Terraform will perform the actions described above.
  Only 'yes' will be accepted to approve.
  Enter a value: yes
 module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creating...
 module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/zenith-vm.cloud-config.yaml]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Creating...
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m10s elapsed]
 module.pve_vm.proxmox_virtual_environment_vm.vm: Creation complete after 3m13s [id=103]
 Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
 Outputs:
 vm_ip = "192.168.66.159"
 ```
 ✅ The VM is now ready!
 ![VM on Proxmox WebUI deployed using a Terraform module](img/proxmox-vm-deployed-using-terraform-module.png)
 🕗 *Don't pay attention to the uptime, I took the screenshot the next day*
 ---
 ## Deploy Multiple VM at Once
 Ok, I've deployed a single VM, fine. But now, how to scale it? How to deploy multiple instances of that template, with different names, on different nodes, with different size? This is what I will show you now.
 ### One VM per Node
 In the earlier example, we passed fixed values to the module. Instead, we could define a local object to store the VM specs, and reference its values when calling the module. This approach makes it easier to scale the deployment logic later:
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  node_name = local.vm.node_name
  vm_name   = local.vm.vm_name
  vm_cpu    = local.vm.vm_cpu
  vm_ram    = local.vm.vm_ram
  vm_vlan   = local.vm.vm_vlan
 }
 locals {
  vm = {
    node_name = "zenith"
    vm_name   = "zenith-vm"
    vm_cpu    = 2
    vm_ram    = 2048
    vm_vlan   = 66
  }
 }
 ```
 I could also call the module while iterating on that object:
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  for_each  = local.vm_list
  node_name = each.value.node_name
  vm_name   = each.value.vm_name
  vm_cpu    = each.value.vm_cpu
  vm_ram    = each.value.vm_ram
  vm_vlan   = each.value.vm_vlan
 }
 locals {
  vm_list = {
    zenith = {
      node_name = "zenith"
      vm_name   = "zenith-vm"
      vm_cpu    = 2
      vm_ram    = 2048
      vm_vlan   = 66
    }
  }
 }
 ```
 While this does not make sense with only one VM, I could use this module syntax, for example, to deploy one VM per node:
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  for_each  = local.vm_list
  node_name = each.value.node_name
  vm_name   = each.value.vm_name
  vm_cpu    = each.value.vm_cpu
  vm_ram    = each.value.vm_ram
  vm_vlan   = each.value.vm_vlan
 }
 locals {
  vm_list = {
    for vm in flatten([
      for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : {
        node_name = node
        vm_name   = "${node}-vm"
        vm_cpu    = 2
        vm_ram    = 2048
        vm_vlan   = 66
      }
    ]) : vm.vm_name => vm
  }
 }
 data "proxmox_virtual_environment_nodes" "pve_nodes" {}
 output "vm_ip" {
  value = { for k, v in module.pve_vm : k => v.vm_ip }
 }
 ```
 ✅ This would deploy 3 VM on my cluster, one per node:
 ### Multiple VM per Node
 Finally, let’s scale things up by deploying multiple VMs with different configurations per node. We’ll define a set of roles and use a nested loop to generate the desired VM configurations for each Proxmox node:
 ```hcl
 module "pve_vm" {
  source    = "../../modules/pve_vm"
  for_each  = local.vm_list
  node_name = each.value.node_name
  vm_name   = each.value.vm_name
  vm_cpu    = each.value.vm_cpu
  vm_ram    = each.value.vm_ram
  vm_vlan   = each.value.vm_vlan
 }
 locals {
  vm_attr = {
    "master" = { ram = 2048, cpu = 2, vlan = 66 }
    "worker" = { ram = 1024, cpu = 1, vlan = 66 }
  }
  vm_list = {
    for vm in flatten([
      for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : [
        for role, config in local.vm_attr : {
          node_name = node
          vm_name   = "${node}-${role}"
          vm_cpu    = config.cpu
          vm_ram    = config.ram
          vm_vlan   = config.vlan
        }
      ]
    ]) : vm.vm_name => vm
  }
 }
 data "proxmox_virtual_environment_nodes" "pve_nodes" {}
 output "vm_ip" {
  value = { for k, v in module.pve_vm : k => v.vm_ip }
 }
 ```
 🚀 After deploying it with a `terraform apply`, I got this:
 ```bash
 Apply complete! Resources: 6 added, 0 changed, 0 destroyed.
 Outputs:
 vm_ip = {
  "apex-master" = "192.168.66.167"
  "apex-worker" = "192.168.66.168"
  "vertex-master" = "192.168.66.169"
  "vertex-worker" = "192.168.66.170"
  "zenith-master" = "192.168.66.166"
  "zenith-worker" = "192.168.66.172"
 }
 ```
 ---
 ## Conclusion
 We’ve transformed our Proxmox VM deployment into a reusable Terraform module and used it to easily scale our infrastructure across multiple nodes.
 In a next post, I would like to pair Terraform with Ansible to manage the VM deployment and even manage different Terraform workspaces to handle several environments.
 Stay tuned!
--- a/content/post/8-create-manual-kubernetes-cluster-kubeadm.fr.md
+++ b/content/post/8-create-manual-kubernetes-cluster-kubeadm.fr.md
@@ -0,0 +1,636 @@
 ---
 slug: create-manual-kubernetes-cluster-kubeadm
 title: Créer un Cluster Kubernetes Hautement Disponible avec kubeadm sur des VMs
 description: Guide étape par étape pour créer manuellement un cluster Kubernetes hautement disponible sur des machines virtuelles avec kubeadm.
 date: 2025-07-18
 draft: false
 tags:
  - kubernetes
  - kubeadm
  - high-availability
 categories:
  - homelab
 ---
 ## Intro
 Dans cet [article précédent]({{< ref "post/7-terraform-create-proxmox-module" >}}), j'expliquais comment déployer des VMs avec un module **Terraform** sur **Proxmox** et j'avais terminé avec 6 VMs, 3 nœuds masters et 3 nœuds workers, en m'appuyant sur un [template cloud-init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
 Maintenant que l'infrastructure est prête, passons à l'étape suivante : **créer manuellement un cluster Kubernetes** dans mon homelab avec `kubeadm`, hautement disponible utilisant `etcd` empilé.
 Dans cet article, je vais détailler chaque étape de l'installation d’un cluster Kubernetes. Je n'utiliserai pas d'outil d'automatisation pour configurer les nœuds pour le moment, afin de mieux comprendre les étapes impliquées dans le bootstrap d’un cluster Kubernetes. L'automatisation sera couverte dans de futurs articles.
 ---
 ## Qu'est ce que Kubernetes
 Kubernetes est une plateforme open-source qui orchestre des containers sur un ensemble de machines. Elle gère le déploiement, la montée en charge et la santé des applications conteneurisées, ce qui vous permet de vous concentrer sur vos services plutôt que sur l’infrastructure sous-jacente.
 Un cluster Kubernetes est composé de deux types de nœuds : les nœuds control plane (masters) et les workers. Le control plane assure la gestion globale du cluster, il prend les décisions de planification, surveille l’état du système et réagit aux événements. Les workers, eux, exécutent réellement vos applications, dans des containers gérés par Kubernetes.
 Dans cet article, nous allons mettre en place manuellement un cluster Kubernetes avec 3 nœuds control plane et 3 workers. Cette architecture reflète un environnement hautement disponible et proche de la production, même si l’objectif ici est avant tout pédagogique.
 La documentation officielle se trouve [ici](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), je vais utiliser la version **v1.32**.
 ---
 ## Préparer les Nœuds
 Je vais exécuter les étapes suivantes sur les **6 VMs** (masters et workers).
 ### Hostname
 Chaque VM possède un **nom d’hôte unique** et tous les nœuds doivent pouvoir **se résoudre entre eux**.
 Le nom d’hôte est défini à la création de la VM via cloud-init. Mais pour la démonstration, je vais le définir manuellement :
 ```bash
 sudo hostnamectl set-hostname <hostname>
 ```
 Dans mon infrastructure, les nœuds se résolvent via mon serveur DNS sur le domaine `lab.vezpi.me`. Si vous n’avez pas de DNS, vous pouvez inscrire manuellement les IPs des nœuds dans le fichier `/etc/hosts` :
 ```bash
 192.168.66.168 apex-worker
 192.168.66.167 apex-master
 192.168.66.166 zenith-master
 192.168.66.170 vertex-worker
 192.168.66.169 vertex-master
 192.168.66.172 zenith-worker
 ```
 ### Mises à jour Système
 Mes VMs tournent sous **Ubuntu 24.04.2 LTS**. Cloud-init s’occupe des mises à jour après le provisionnement, mais on s’assure quand même que tout est bien à jour et on installe les paquets nécessaires pour ajouter le dépôt Kubernetes :
 ```bash
 sudo apt update && sudo apt upgrade -y
 sudo apt install -y apt-transport-https ca-certificates curl gpg
 ```
 ### Swap
 Par défaut, `kubelet` ne démarre pas si une **mémoire swap** est détectée sur un nœud. Il faut donc la désactiver ou la rendre tolérable par `kubelet`.
 Mes VMs ne disposent pas de swap, mais voici comment le désactiver si besoin :
 ```bash
 sudo swapoff -a
 sudo sed -i '/ swap / s/^/#/' /etc/fstab
 ```
 ### Pare-feu
 Dans ce lab, je désactive simplement le pare-feu local (à ne pas faire en production) :
 ```bash
 sudo systemctl disable --now ufw
 ```
 En production, vous devez autoriser la communication entre les nœuds sur les ports suivants :
 #### Control Plane
 | Protocole | Direction | Ports     | Usage                   | Utilisé par          |
 | --------- | --------- | --------- | ----------------------- | -------------------- |
 | TCP       | Entrant   | 6443      | API server Kubernetes   | Tous                 |
 | TCP       | Entrant   | 2379-2380 | API client etcd         | kube-apiserver, etcd |
 | TCP       | Entrant   | 10250     | API Kubelet             | Plan de contrôle     |
 | TCP       | Entrant   | 10259     | kube-scheduler          | Lui-même             |
 | TCP       | Entrant   | 10257     | kube-controller-manager | Lui-même             |
 #### Worker
 | Protocole | Direction | Ports       | Usage             | Utilisé par    |
 | --------- | --------- | ----------- | ----------------- | -------------- |
 | TCP       | Entrant   | 10250       | API Kubelet       | Control plane  |
 | TCP       | Entrant   | 10256       | kube-proxy        | Load balancers |
 | TCP       | Entrant   | 30000-32767 | Services NodePort | Tous           |
 ### Modules Noyau et Paramètres sysctl
 Kubernetes requiert l’activation de deux modules noyau :
 - **overlay** : pour permettre l’empilement de systèmes de fichiers.
 - **br_netfilter** : pour activer le filtrage des paquets sur les interfaces bridge.
 Activation des modules :
 ```bash
 cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
 overlay
 br_netfilter
 EOF
 sudo modprobe overlay
 sudo modprobe br_netfilter
 ```
 Appliquer les paramètres noyau nécessaires pour la partie réseau :
 ```bash
 cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
 net.bridge.bridge-nf-call-iptables  = 1
 net.bridge.bridge-nf-call-ip6tables = 1
 net.ipv4.ip_forward                 = 1
 EOF
 sudo sysctl --system
 ```
 ### Runtime de Containers
 Chaque nœud du cluster doit disposer d’un **runtime de containers** pour pouvoir exécuter des Pods. J’utilise ici `containerd` :
 ```bash
 sudo apt install -y containerd
 ```
 Créer la configuration par défaut :
 ```bash
 sudo mkdir -p /etc/containerd
 containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
 ```
 Utiliser `systemd` comme pilote de _cgroup_ :
 ```bash
 sudo sed -i 's/^\(\s*SystemdCgroup\s*=\s*\)false/\1true/' /etc/containerd/config.toml
 ```
 Redémarrer et activer le service `containerd` :
 ```bash
 sudo systemctl restart containerd
 sudo systemctl enable containerd
 ```
 ### Paquets Kubernetes
 Dernière étape : installer les paquets Kubernetes. On commence par ajouter le dépôt officiel et sa clé de signature.
 Ajouter la clé :
 ```bash
 curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
 ```
 Ajouter le dépôt :
 ```bash
 echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
 ```
 Installer ensuite les paquets nécessaires :
 - `kubeadm` : l’outil pour initier un cluster Kubernetes.
 - `kubelet` : l’agent qui s’exécute sur tous les nœuds et qui gère les pods/containers.
 - `kubectl` : l’outil en ligne de commande pour interagir avec le cluster.
 Sur les nœuds, on installe `kubelet` et `kubeadm`, puis on les fige :
 ```bash
 sudo apt-get update
 sudo apt-get install -y kubelet kubeadm
 sudo apt-mark hold kubelet kubeadm
 ```
 ℹ️ Je ne gérerai pas le cluster depuis les nœuds eux-mêmes, j’installe `kubectl` sur mon contrôleur LXC à la place :
 ```bash
 sudo apt-get update
 sudo apt-get install -y kubectl
 sudo apt-mark hold kubectl
 ```
 ---
 ## Initialiser le Cluster
 Une fois tous les nœuds préparés, on peut initialiser le **plan de contrôle** Kubernetes sur le **premier nœud master**.
 ### Amorcer le Cluster
 Exécutez la commande suivante pour amorcer le cluster:
 ```bash
 sudo kubeadm init \
  --control-plane-endpoint "k8s-lab.lab.vezpi.me:6443" \
  --upload-certs \
  --pod-network-cidr=10.10.0.0/16
 ```
 **Explications** :
 - `--control-plane-endpoint` : Nom DNS pour votre plan de contrôle.
 - `--upload-certs` : Télécharge les certificats qui doivent être partagés entre toutes les masters du cluster.
 - `--pod-network-cidr` : Sous-réseau à utiliser pour le CNI.
 Cette étape va :
 - Initialiser la base `etcd` et les composants du plan de contrôle.
 - Configurer RBAC et les tokens d’amorçage.
 - Afficher deux commandes `kubeadm join` importantes : une pour les **workers**, l’autre pour les **masters supplémentaires**.
 ℹ️ Le nom DNS `k8s-lab.lab.vezpi.me` est géré dans mon homelab par **Unbound DNS**, cela résout sur mon interface d'**OPNsense** où un service **HAProxy** écoute sur le port 6443 et équilibre la charge entre les 3 nœuds du plan de contrôle.
 Vous verrez aussi un message indiquant comment configurer l’accès `kubectl`.
 ```plaintext
 I0718 07:18:29.306814   14724 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
 [init] Using Kubernetes version: v1.32.7
 [preflight] Running pre-flight checks
 [preflight] Pulling images required for setting up a Kubernetes cluster
 [preflight] This might take a minute or two, depending on the speed of your internet connection
 [preflight] You can also perform this action beforehand using 'kubeadm config images pull'
 W0718 07:18:29.736833   14724 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
 [certs] Using certificateDir folder "/etc/kubernetes/pki"
 [certs] Generating "ca" certificate and key
 [certs] Generating "apiserver" certificate and key
 [certs] apiserver serving cert is signed for DNS names [apex-master k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.66.167]
 [certs] Generating "apiserver-kubelet-client" certificate and key
 [certs] Generating "front-proxy-ca" certificate and key
 [certs] Generating "front-proxy-client" certificate and key
 [certs] Generating "etcd/ca" certificate and key
 [certs] Generating "etcd/server" certificate and key
 [certs] etcd/server serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
 [certs] Generating "etcd/peer" certificate and key
 [certs] etcd/peer serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
 [certs] Generating "etcd/healthcheck-client" certificate and key
 [certs] Generating "apiserver-etcd-client" certificate and key
 [certs] Generating "sa" key and public key
 [kubeconfig] Using kubeconfig folder "/etc/kubernetes"
 [kubeconfig] Writing "admin.conf" kubeconfig file
 [kubeconfig] Writing "super-admin.conf" kubeconfig file
 [kubeconfig] Writing "kubelet.conf" kubeconfig file
 [kubeconfig] Writing "controller-manager.conf" kubeconfig file
 [kubeconfig] Writing "scheduler.conf" kubeconfig file
 [etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
 [control-plane] Using manifest folder "/etc/kubernetes/manifests"
 [control-plane] Creating static Pod manifest for "kube-apiserver"
 [control-plane] Creating static Pod manifest for "kube-controller-manager"
 [control-plane] Creating static Pod manifest for "kube-scheduler"
 [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
 [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
 [kubelet-start] Starting the kubelet
 [wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests"
 [kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
 [kubelet-check] The kubelet is healthy after 501.894876ms
 [api-check] Waiting for a healthy API server. This can take up to 4m0s
 [api-check] The API server is healthy after 9.030595455s
 [upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
 [kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
 [upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 [upload-certs] Using certificate key:
 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
 [mark-control-plane] Marking the node apex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
 [mark-control-plane] Marking the node apex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
 [bootstrap-token] Using token: 8etamd.g8whseg60kg09nu1
 [bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
 [bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
 [bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
 [bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
 [bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
 [bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
 [kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
 [addons] Applied essential addon: CoreDNS
 [addons] Applied essential addon: kube-proxy
 Your Kubernetes control-plane has initialized successfully!
 To start using your cluster, you need to run the following as a regular user:
  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config
 Alternatively, if you are the root user, you can run:
  export KUBECONFIG=/etc/kubernetes/admin.conf
 You should now deploy a pod network to the cluster.
 Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/
 You can now join any number of control-plane nodes running the following command on each as root:
  kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
        --discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c \
        --control-plane --certificate-key 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
 Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
 As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
 "kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
 Then you can join any number of worker nodes by running the following on each as root:
 kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
        --discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
 ```
 ### Configurer `kubectl`
 Si vous préférez gérer votre cluster depuis le nœud master, vous pouvez simplement copier-coller depuis la sortie de la commande `kubeadm init` :
 ```bash
 mkdir -p $HOME/.kube
 sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
 sudo chown $(id -u):$(id -g) $HOME/.kube/config
 ```
 Si vous préférez contrôler le cluster depuis autre part, dans mon cas depuis mon bastion LXC :
 ```bash
 mkdir -p $HOME/.kube
 rsync --rsync-path="sudo rsync" <master-node>:/etc/kubernetes/admin.conf $HOME/.kube/config
 ```
 Vérifiez l'accès :
 ```bash
 kubectl get nodes
 ```
 ℹ️ You devriez voir seulement le premier master listé (dans l'état `NotReady` jusqu'à ce que le CNI soit déployé).
 ### Installer le Plugin CNI Cilium
 Depuis la [documentation Cilium](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/), Il y a 2 manières principales pour installer le CNI : utiliser la **CLI Cilium** ou **Helm**, pour ce lab je vais utiliser l'outil CLI.
 #### Installer la CLI Cilium 
 La CLI Cilium peut être utilisée pour installer Cilium, inspecter l'état de l'installation Cilium et activer/désactiver diverses fonctionnalités (ex : `clustermesh`, `Hubble`) :
 ```bash
 CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
 curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-amd64.tar.gz{,.sha256sum}
 sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
 sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/local/bin
 rm cilium-linux-amd64.tar.gz{,.sha256sum}
 ```
 #### Installer Cilium
 Installer Cilium dans le cluster Kubernetes pointé par le contexte `kubectl` :
 ```bash
 cilium install
 ```
 ```plaintext
 __   Using Cilium version 1.17.5
 __ Auto-detected cluster name: kubernetes
 __ Auto-detected kube-proxy has been installed
 ```
 #### Valider l'Installation
 Pour valider que Cilium a été installé correctement :
 ```bash
 cilium status --wait
 ```
 ```plaintext
    /__\
 /__\__/__\    Cilium:             OK
 \__/__\__/    Operator:           OK
 /__\__/__\    Envoy DaemonSet:    OK
 \__/__\__/    Hubble Relay:       disabled
    \__/       ClusterMesh:        disabled
 DaemonSet              cilium                   Desired: 1, Ready: 1/1, Available: 1/1
 DaemonSet              cilium-envoy             Desired: 1, Ready: 1/1, Available: 1/1
 Deployment             cilium-operator          Desired: 1, Ready: 1/1, Available: 1/1
 Containers:            cilium                   Running: 1
                       cilium-envoy             Running: 1
                       cilium-operator          Running: 1
                       clustermesh-apiserver    
                       hubble-relay             
 Cluster Pods:          0/2 managed by Cilium
 Helm chart version:    1.17.5
 Image versions         cilium             quay.io/cilium/cilium:v1.17.5@sha256:baf8541723ee0b72d6c489c741c81a6fdc5228940d66cb76ef5ea2ce3c639ea6: 1
                       cilium-envoy       quay.io/cilium/cilium-envoy:v1.32.6-1749271279-0864395884b263913eac200ee2048fd985f8e626@sha256:9f69e290a7ea3d4edf9192acd81694089af048ae0d8a67fb63bd62dc1d72203e: 1
                       cilium-operator    quay.io/cilium/operator-generic:v1.17.5@sha256:f954c97eeb1b47ed67d08cc8fb4108fb829f869373cbb3e698a7f8ef1085b09e: 1
 ```
 Une fois installé, le nœud master doit passer au statut `Ready`.
 ```plaintext
 NAME          STATUS   ROLES           AGE   VERSION
 apex-master   Ready    control-plane   99m   v1.32.7
 ```
 ---
 ## Ajouter les Nœuds Supplémentaires
 Après avoir initialisé le premier nœud du control plane, vous pouvez maintenant **ajouter les autres nœuds** au cluster.
 Il existe deux types de commandes `join` :
 - Une pour rejoindre les **nœuds du control plane (masters)**
 - Une pour rejoindre les **nœuds workers**
 Ces commandes sont affichées à la fin de la commande `kubeadm init`. Si vous ne les avez pas copiées, il est possible de les **régénérer**.
 ⚠️ Les certificats et la clé de déchiffrement **expirent au bout de deux heures**.
 ### Ajouter des Masters
 Vous pouvez maintenant ajouter d'autres nœuds du control plane en exécutant la commande fournie par `kubeadm init` :
 ```bash
 sudo kubeadm join <control-plane-endpoint> --token <token> --discovery-token-ca-cert-hash <discovery-token-ca-cert-hash> --control-plane --certificate-key <certificate-key>
 ```
 ```plaintext
 [preflight] Running pre-flight checks
 [preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
 [preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
 [preflight] Running pre-flight checks before initializing the new control plane instance
 [preflight] Pulling images required for setting up a Kubernetes cluster
 [preflight] This might take a minute or two, depending on the speed of your internet connection
 [preflight] You can also perform this action beforehand using 'kubeadm config images pull'
 W0718 09:27:32.248290   12043 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
 [download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 [download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
 [certs] Using certificateDir folder "/etc/kubernetes/pki"
 [certs] Generating "etcd/server" certificate and key
 [certs] etcd/server serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
 [certs] Generating "etcd/peer" certificate and key
 [certs] etcd/peer serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
 [certs] Generating "apiserver-etcd-client" certificate and key
 [certs] Generating "etcd/healthcheck-client" certificate and key
 [certs] Generating "apiserver" certificate and key
 [certs] apiserver serving cert is signed for DNS names [k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local vertex-master] and IPs [10.96.0.1 192.168.66.169]
 [certs] Generating "apiserver-kubelet-client" certificate and key
 [certs] Generating "front-proxy-client" certificate and key
 [certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
 [certs] Using the existing "sa" key
 [kubeconfig] Generating kubeconfig files
 [kubeconfig] Using kubeconfig folder "/etc/kubernetes"
 [kubeconfig] Writing "admin.conf" kubeconfig file
 [kubeconfig] Writing "controller-manager.conf" kubeconfig file
 [kubeconfig] Writing "scheduler.conf" kubeconfig file
 [control-plane] Using manifest folder "/etc/kubernetes/manifests"
 [control-plane] Creating static Pod manifest for "kube-apiserver"
 [control-plane] Creating static Pod manifest for "kube-controller-manager"
 [control-plane] Creating static Pod manifest for "kube-scheduler"
 [check-etcd] Checking that the etcd cluster is healthy
 [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
 [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
 [kubelet-start] Starting the kubelet
 [kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
 [kubelet-check] The kubelet is healthy after 501.761616ms
 [kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
 [etcd] Announced new etcd member joining to the existing etcd cluster
 [etcd] Creating static Pod manifest for "etcd"
 {"level":"warn","ts":"2025-07-18T09:27:36.040077Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
 [...]
 {"level":"warn","ts":"2025-07-18T09:27:44.976805Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
 [etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
 [mark-control-plane] Marking the node vertex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
 [mark-control-plane] Marking the node vertex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
 This node has joined the cluster and a new control plane instance was created:
 * Certificate signing request was sent to apiserver and approval was received.
 * The Kubelet was informed of the new secure connection details.
 * Control plane label and taint were applied to the new node.
 * The Kubernetes control plane instances scaled up.
 * A new etcd member was added to the local/stacked etcd cluster.
 To start administering your cluster from this node, you need to run the following as a regular user:
        mkdir -p $HOME/.kube
        sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
        sudo chown $(id -u):$(id -g) $HOME/.kube/config
 Run 'kubectl get nodes' to see this node join the cluster.
 ```
 #### Regénérer les Certificats
 Si les certificats ont expiré, vous verrez un message d’erreur lors du `kubeadm join` :
 ```plaintext
 [download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
 ```
 Dans ce cas, vous pouvez **retélécharger les certificats** et générer une nouvelle clé de chiffrement à partir d’un nœud déjà membre du cluster :
 ```bash
 sudo kubeadm init phase upload-certs --upload-certs
 ```
 ```plaintext
 I0718 09:26:12.448472   18624 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
 [upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 [upload-certs] Using certificate key:
 7531149107ebc3caf4990f94d19824aecf39d93b84ee1b9c86aee84c04e76656
 ```
 #### Générer un Token
 Associé au certificat, vous aurez besoin d’un **nouveau token**, cette commande affichera directement la commande complète `join` pour un master :
 ```bash
 sudo kubeadm token create --print-join-command --certificate-key <certificate-key>
 ```
 Utilisez cette commande sur les nœuds à ajouter au cluster Kubernetes comme master.
 ### Ajouter des Workers
 Vous pouvez rejoindre n'importe quel nombre de nœuds workers avec la commande suivante :
 ```bash
 sudo kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
        --discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
 ```
 ```plaintext
 [preflight] Running pre-flight checks
 [preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
 [preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
 [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
 [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
 [kubelet-start] Starting the kubelet
 [kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
 [kubelet-check] The kubelet is healthy after 506.731798ms
 [kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
 This node has joined the cluster:
 * Certificate signing request was sent to apiserver and a response was received.
 * The Kubelet was informed of the new secure connection details.
 Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
 ```
 Encore une fois, si vous avez perdu l’output initial de `kubeadm init`, vous pouvez régénérer une nouvelle commande complète :
 ```bash
 sudo kubeadm token create --print-join-command
 ```
 Utilisez cette commande sur les nœuds à ajouter comme workers.
 ### Vérifier le Cluster
 Depuis votre contrôleur, vous pouvez vérifier que tous les nœuds ont bien rejoint le cluster et sont dans l’état `Ready` :
 ```bash
 kubectl get node
 ```
 ```plaintext
 NAME            STATUS   ROLES           AGE     VERSION
 apex-master     Ready    control-plane   154m    v1.32.7
 apex-worker     Ready    <none>          5m14s   v1.32.7
 vertex-master   Ready    control-plane   26m     v1.32.7
 vertex-worker   Ready    <none>          3m39s   v1.32.7
 zenith-master   Ready    control-plane   23m     v1.32.7
 zenith-worker   Ready    <none>          3m26s   v1.32.7
 ```
 Pour valider que le cluster a une bonne connectivité réseau :
 ```bash
 cilium connectivity test
 ```
 ```plaintext
 __   Monitor aggregation detected, will skip some flow validation steps
   [kubernetes] Creating namespace cilium-test-1 for connectivity check...
 __ [kubernetes] Deploying echo-same-node service...
 __ [kubernetes] Deploying DNS test server configmap...
 __ [kubernetes] Deploying same-node deployment...
 __ [kubernetes] Deploying client deployment...
 __ [kubernetes] Deploying client2 deployment...
 __ [kubernetes] Deploying client3 deployment...
 __ [kubernetes] Deploying echo-other-node service...
 __ [kubernetes] Deploying other-node deployment...
 __ [host-netns] Deploying kubernetes daemonset...
 __ [host-netns-non-cilium] Deploying kubernetes daemonset...
 __   Skipping tests that require a node Without Cilium
   [kubernetes] Waiting for deployment cilium-test-1/client to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/client2 to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/echo-same-node to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/client3 to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/echo-other-node to become ready...
 __ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach default/kubernetes service...
 __ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach default/kubernetes service...
 __ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach default/kubernetes service...
 __ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to become ready...
 __ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-6824w
 __ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
 __ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to become ready...
 __ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-6824w
 __ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
 __ [kubernetes] Waiting for NodePort 192.168.66.166:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.166:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.172:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.172:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.167:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.167:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.168:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.168:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.169:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.169:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.170:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.170:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns-non-cilium to become ready...
 __ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns to become ready...
 __   Skipping IPCache check
   Enabling Hubble telescope...
 __   Unable to contact Hubble Relay, disabling Hubble telescope and flow validation: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp [::1]:4245: connect: connection refused"
     Expose Relay locally with:
   cilium hubble enable
   cilium hubble port-forward&
 __   Cilium version: 1.17.5
  [cilium-test-1] Running 123 tests ...
 [=] [cilium-test-1] Test [no-policies] [1/123]
 [...]
 [=] [cilium-test-1] Test [check-log-errors] [123/123]
 .................................................
 __ [cilium-test-1] All 73 tests (739 actions) successful, 50 tests skipped, 1 scenarios skipped.
 ```
 ⌛ Ce test de connectivité peut prendre jusqu’à **30 minutes**.
 ---
 ## Conclusion
 🚀 Notre cluster Kubernetes hautement disponible est prêt !
 Dans cet article, nous avons vu comment **créer manuellement un cluster Kubernetes** dans mon homelab à l’aide de `kubeadm`, sur un ensemble de 6 machines Ubuntu (3 masters et 3 workers) préalablement déployées avec Terraform sur Proxmox.
 Nous avons suivi les étapes suivantes :
 - Préparation des nœuds avec les outils, modules noyau et runtime nécessaires
 - Installation des paquets Kubernetes
 - Initialisation du cluster depuis le premier nœud master
 - Ajout des autres nœuds du plan de contrôle et les workers
 - Vérification de l’état et du bon fonctionnement du cluster
 Cette approche manuelle permet de mieux comprendre comment un cluster Kubernetes est construit en interne. C’est une excellente base avant de passer à l’automatisation dans les prochains articles, en utilisant des outils comme Ansible.
 Restez connectés, la suite sera axée sur l’automatisation de tout ça !
--- a/content/post/8-create-manual-kubernetes-cluster-kubeadm.md
+++ b/content/post/8-create-manual-kubernetes-cluster-kubeadm.md
@@ -0,0 +1,635 @@
 ---
 slug: create-manual-kubernetes-cluster-kubeadm
 title: Create a Highly Available Kubernetes Cluster with kubeadm on VMs
 description: Step-by-step guide to manually build a highly available Kubernetes cluster on virtual machines using kubeadm.
 date: 2025-07-18
 draft: false
 tags:
  - kubernetes
  - kubeadm
  - high-availability
 categories:
  - homelab
 ---
 ## Intro
 In this [previous article]({{< ref "post/7-terraform-create-proxmox-module" >}}), I explained how to deploy VMs using a **Terraform** module with **Proxmox** and ended up with 6 VMs, 3 masters and 3 workers nodes, based on [cloud-init template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
 Now that the infrastructure is ready, let’s move on to the next step: **manually building a Kubernetes cluster** in my homelab using `kubeadm`, highly available using stacked `etcd`.
 In this post, I’ll walk through each step of the installation process of a Kubernetes cluster. I will not rely on automation tools to configure the nodes for now, to better understand what are the steps involved in a Kubernetes cluster bootstrapping. Automation will be covered in future posts.
 ---
 ## What is Kubernetes
 Kubernetes is an open-source platform for orchestrating containers across a group of machines. It handles the deployment, scaling, and health of containerized applications, allowing you to focus on building your services rather than managing infrastructure details.
 A Kubernetes cluster is made up of two main types of nodes: control plane (masters) nodes and worker nodes. The control plane is responsible for the overall management of the cluster, it makes decisions about scheduling, monitoring, and responding to changes in the system. The worker nodes are where your applications actually run, inside containers managed by Kubernetes.
 In this post, we’ll manually set up a Kubernetes cluster with 3 control plane nodes (masters) and 3 workers. This structure reflects a highly available and production-like setup, even though the goal here is mainly to learn and understand how the components fit together.
 The official documentation can be found [here](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), I will use the version **v1.32**.
 ---
 ## Prepare the Nodes
 I will perform the following steps on all 6 VMs (masters and workers).
 ### Hostname
 Each VM has a unique **hostname** and all nodes must **resolve** each other.
 The hostname is set upon the VM creation with cloud-init. But for demonstration purpose, I'll set it manually:
 ```bash
 sudo hostnamectl set-hostname <hostname>
 ```
 On my infrastructure, the nodes resolve the hostnames each other using my DNS server on that domain (`lab.vezpi.me`). In case you don't have a DNS server, you can hardcode the nodes IP in each `/etc/hosts` file:
 ```bash
 192.168.66.168 apex-worker
 192.168.66.167 apex-master
 192.168.66.166 zenith-master
 192.168.66.170 vertex-worker
 192.168.66.169 vertex-master
 192.168.66.172 zenith-worker
 ```
 ### OS Updates
 My VMs are running **Ubuntu 24.04.2 LTS**. Cloud-init handles the updates after the provision in that case, but let's make sure everything is up to date and install packages needed to add Kubernetes repository:
 ```bash
 sudo apt update && sudo apt upgrade -y
 sudo apt install -y apt-transport-https ca-certificates curl gpg
 ```
 ### Swap
 The default behavior of a `kubelet` is to fail to start if **swap memory** is detected on a node. This means that swap should either be disabled or tolerated by `kubelet`. 
 My VMs are not using swap, but here how to disable it:
 ```bash
 sudo swapoff -a
 sudo sed -i '/ swap / s/^/#/' /etc/fstab
 ```
 ### Firewall
 For this lab, I will just disable the local firewall (don't do that in production):
 ```bash
 sudo systemctl disable --now ufw
 ```
 For production, you want to allow the nodes to talk to each other on these ports:
 #### Control plane
 |Protocol|Direction|Port Range|Purpose|Used By|
 |---|---|---|---|---|
 |TCP|Inbound|6443|Kubernetes API server|All|
 |TCP|Inbound|2379-2380|etcd server client API|kube-apiserver, etcd|
 |TCP|Inbound|10250|Kubelet API|Self, Control plane|
 |TCP|Inbound|10259|kube-scheduler|Self|
 |TCP|Inbound|10257|kube-controller-manager|Self|
 #### Worker
 | Protocol | Direction | Port Range  | Purpose            | Used By              |
 | -------- | --------- | ----------- | ------------------ | -------------------- |
 | TCP      | Inbound   | 10250       | Kubelet API        | Self, Control plane  |
 | TCP      | Inbound   | 10256       | kube-proxy         | Self, Load balancers |
 | TCP      | Inbound   | 30000-32767 | NodePort Services† | All                  |
 ### Kernel Modules and Settings
 Kubernetes needs 2 kernel modules:
 - **overlay**: for facilitating the layering of one filesystem on top of another
 - **br_netfilter**: for enabling bridge network connections
 Let's enable them:
 ```bash
 cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
 overlay
 br_netfilter
 EOF
 sudo modprobe overlay
 sudo modprobe br_netfilter
 ```
 Some kernel settings related to network are also needed:
 ```bash
 cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
 net.bridge.bridge-nf-call-iptables  = 1
 net.bridge.bridge-nf-call-ip6tables = 1
 net.ipv4.ip_forward                 = 1
 EOF
 sudo sysctl --system
 ```
 ### Container Runtime
 You need to install a **container runtime** into each node in the cluster so that Pods can run there. I will use `containerd`:
 ```bash
 sudo apt install -y containerd
 ```
 Create the default configuration:
 ```bash
 sudo mkdir -p /etc/containerd
 containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
 ```
 Enable `systemd` *cgroup* driver:
 ```bash
 sudo sed -i 's/^\(\s*SystemdCgroup\s*=\s*\)false/\1true/' /etc/containerd/config.toml
 ```
 Restart and enable the `containerd` service
 ```bash
 sudo systemctl restart containerd
 sudo systemctl enable containerd
 ```
 ### Kubernetes Packages
 Last step: install the Kubernetes packages. I start with adding the repository and its signing key.
 Add the key:
 ```bash
 curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
 ```
 Add the repository:
 ```bash
 echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
 ```
 Finally I can install the needed packages:
 - `kubeadm`: the command to bootstrap the cluster.
 - `kubelet`: the component that runs on all of the machines in your cluster and does things like starting pods and containers.
 - `kubectl`: the command line util to talk to your cluster.
 On the nodes, update the `apt` package index, install `kubelet` and `kubeadm`, and pin their version:
 ```bash
 sudo apt-get update
 sudo apt-get install -y kubelet kubeadm
 sudo apt-mark hold kubelet kubeadm
 ```
 ℹ️ I will not manage the cluster from my nodes, I install `kubectl` on my LXC controller instead:
 ```bash
 sudo apt-get update
 sudo apt-get install -y kubectl
 sudo apt-mark hold kubectl
 ```
 ---
 ## Initialize the Cluster
 Once all nodes are prepared, it’s time to initialize the Kubernetes control plane on the **first master node**.
 ### Bootstrap the Cluster
 Run the following command to bootstrap the cluster:
 ```bash
 sudo kubeadm init \
  --control-plane-endpoint "k8s-lab.lab.vezpi.me:6443" \
  --upload-certs \
  --pod-network-cidr=10.10.0.0/16
 ```
 **Explanation**:
 - `--control-plane-endpoint`: DNS name for your control plane.
 - `--upload-certs`: Upload the certificates that should be shared across all masters of the cluster.
 - `--pod-network-cidr`: Subnet for the CNI.
 This step will:
 - Initialize the `etcd` database and control plane components.
 - Set up RBAC and bootstrap tokens.
 - Output two important `kubeadm join` commands: one for **workers**, and one for **additional control-plane nodes**.
 ℹ️ The DNS name `k8s-lab.lab.vezpi.me` is handled in my homelab by **Unbound DNS**, this resolves on my **OPNsense** interface where a **HAProxy** service listen on the port 6443 and load balance between the 3 control plane nodes.
 You’ll also see a message instructing you to set up your `kubectl` access.
 ```plaintext
 I0718 07:18:29.306814   14724 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
 [init] Using Kubernetes version: v1.32.7
 [preflight] Running pre-flight checks
 [preflight] Pulling images required for setting up a Kubernetes cluster
 [preflight] This might take a minute or two, depending on the speed of your internet connection
 [preflight] You can also perform this action beforehand using 'kubeadm config images pull'
 W0718 07:18:29.736833   14724 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
 [certs] Using certificateDir folder "/etc/kubernetes/pki"
 [certs] Generating "ca" certificate and key
 [certs] Generating "apiserver" certificate and key
 [certs] apiserver serving cert is signed for DNS names [apex-master k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.66.167]
 [certs] Generating "apiserver-kubelet-client" certificate and key
 [certs] Generating "front-proxy-ca" certificate and key
 [certs] Generating "front-proxy-client" certificate and key
 [certs] Generating "etcd/ca" certificate and key
 [certs] Generating "etcd/server" certificate and key
 [certs] etcd/server serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
 [certs] Generating "etcd/peer" certificate and key
 [certs] etcd/peer serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
 [certs] Generating "etcd/healthcheck-client" certificate and key
 [certs] Generating "apiserver-etcd-client" certificate and key
 [certs] Generating "sa" key and public key
 [kubeconfig] Using kubeconfig folder "/etc/kubernetes"
 [kubeconfig] Writing "admin.conf" kubeconfig file
 [kubeconfig] Writing "super-admin.conf" kubeconfig file
 [kubeconfig] Writing "kubelet.conf" kubeconfig file
 [kubeconfig] Writing "controller-manager.conf" kubeconfig file
 [kubeconfig] Writing "scheduler.conf" kubeconfig file
 [etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
 [control-plane] Using manifest folder "/etc/kubernetes/manifests"
 [control-plane] Creating static Pod manifest for "kube-apiserver"
 [control-plane] Creating static Pod manifest for "kube-controller-manager"
 [control-plane] Creating static Pod manifest for "kube-scheduler"
 [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
 [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
 [kubelet-start] Starting the kubelet
 [wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests"
 [kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
 [kubelet-check] The kubelet is healthy after 501.894876ms
 [api-check] Waiting for a healthy API server. This can take up to 4m0s
 [api-check] The API server is healthy after 9.030595455s
 [upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
 [kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
 [upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 [upload-certs] Using certificate key:
 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
 [mark-control-plane] Marking the node apex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
 [mark-control-plane] Marking the node apex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
 [bootstrap-token] Using token: 8etamd.g8whseg60kg09nu1
 [bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
 [bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
 [bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
 [bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
 [bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
 [bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
 [kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
 [addons] Applied essential addon: CoreDNS
 [addons] Applied essential addon: kube-proxy
 Your Kubernetes control-plane has initialized successfully!
 To start using your cluster, you need to run the following as a regular user:
  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config
 Alternatively, if you are the root user, you can run:
  export KUBECONFIG=/etc/kubernetes/admin.conf
 You should now deploy a pod network to the cluster.
 Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/
 You can now join any number of control-plane nodes running the following command on each as root:
  kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
        --discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c \
        --control-plane --certificate-key 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
 Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
 As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
 "kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
 Then you can join any number of worker nodes by running the following on each as root:
 kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
        --discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
 ```
 ### Configure `kubectl` 
 If you want to manage your cluster from your master node, you can simply copy paste from the output of the `kubeadm init` command:
 ```bash
 mkdir -p $HOME/.kube
 sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
 sudo chown $(id -u):$(id -g) $HOME/.kube/config
 ```
 If you prefer to control the cluster from elsewhere, in my case my from my LXC bastion:
 ```bash
 mkdir -p $HOME/.kube
 rsync --rsync-path="sudo rsync" <master-node>:/etc/kubernetes/admin.conf $HOME/.kube/config
 ```
 Verify your access:
 ```bash
 kubectl get nodes
 ```
 ℹ️ You should see only the first master listed (in `NotReady` state until the CNI is deployed).
 ### Install the CNI Plugin Cilium
 From the [Cilium documentation](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/), there are 2 common ways for installing the CNI: using the **Cilium CLI** or **Helm**, for that lab I will use the CLI tool.
 #### Install the Cilium CLI
 The Cilium CLI can be used to install Cilium, inspect the state of a Cilium installation, and enable/disable various features (e.g. `clustermesh`, `Hubble`). Install it on your controller where `kubectl` is installed:
 ```bash
 CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
 curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-amd64.tar.gz{,.sha256sum}
 sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
 sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/local/bin
 rm cilium-linux-amd64.tar.gz{,.sha256sum}
 ```
 #### Install Cilium
 Install Cilium into the Kubernetes cluster pointed to by your current `kubectl` context:
 ```bash
 cilium install
 ```
 ```plaintext
 __   Using Cilium version 1.17.5
 __ Auto-detected cluster name: kubernetes
 __ Auto-detected kube-proxy has been installed
 ```
 #### Validate the Installation
 To validate that Cilium has been properly installed:
 ```bash
 cilium status --wait
 ```
 ```plaintext
    /__\
 /__\__/__\    Cilium:             OK
 \__/__\__/    Operator:           OK
 /__\__/__\    Envoy DaemonSet:    OK
 \__/__\__/    Hubble Relay:       disabled
    \__/       ClusterMesh:        disabled
 DaemonSet              cilium                   Desired: 1, Ready: 1/1, Available: 1/1
 DaemonSet              cilium-envoy             Desired: 1, Ready: 1/1, Available: 1/1
 Deployment             cilium-operator          Desired: 1, Ready: 1/1, Available: 1/1
 Containers:            cilium                   Running: 1
                       cilium-envoy             Running: 1
                       cilium-operator          Running: 1
                       clustermesh-apiserver    
                       hubble-relay             
 Cluster Pods:          0/2 managed by Cilium
 Helm chart version:    1.17.5
 Image versions         cilium             quay.io/cilium/cilium:v1.17.5@sha256:baf8541723ee0b72d6c489c741c81a6fdc5228940d66cb76ef5ea2ce3c639ea6: 1
                       cilium-envoy       quay.io/cilium/cilium-envoy:v1.32.6-1749271279-0864395884b263913eac200ee2048fd985f8e626@sha256:9f69e290a7ea3d4edf9192acd81694089af048ae0d8a67fb63bd62dc1d72203e: 1
                       cilium-operator    quay.io/cilium/operator-generic:v1.17.5@sha256:f954c97eeb1b47ed67d08cc8fb4108fb829f869373cbb3e698a7f8ef1085b09e: 1
 ```
 Once installed, the master node should transition to `Ready` status:
 ```plaintext
 NAME          STATUS   ROLES           AGE   VERSION
 apex-master   Ready    control-plane   99m   v1.32.7
 ```
 ---
 ## Join Additional Nodes
 After initializing the first control plane node, you can now join the remaining nodes to the cluster.
 There are two types of join commands:
 - One for joining **control-plane (master) nodes**
 - One for joining **worker nodes**
 These commands were displayed at the end of the `kubeadm init` output. If you didn’t copy them, you can regenerate them. 
 ⚠️ The certificates and the decryption key expire after two hours.
 ### Additional Masters
 You can now join any number of control-plane node by running the command  given by the `kubeadm init` command:
 ```bash
 sudo kubeadm join <control-plane-endpoint> --token <token> --discovery-token-ca-cert-hash <discovery-token-ca-cert-hash> --control-plane --certificate-key <certificate-key>
 ```
 ```plaintext
 [preflight] Running pre-flight checks
 [preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
 [preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
 [preflight] Running pre-flight checks before initializing the new control plane instance
 [preflight] Pulling images required for setting up a Kubernetes cluster
 [preflight] This might take a minute or two, depending on the speed of your internet connection
 [preflight] You can also perform this action beforehand using 'kubeadm config images pull'
 W0718 09:27:32.248290   12043 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
 [download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 [download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
 [certs] Using certificateDir folder "/etc/kubernetes/pki"
 [certs] Generating "etcd/server" certificate and key
 [certs] etcd/server serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
 [certs] Generating "etcd/peer" certificate and key
 [certs] etcd/peer serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
 [certs] Generating "apiserver-etcd-client" certificate and key
 [certs] Generating "etcd/healthcheck-client" certificate and key
 [certs] Generating "apiserver" certificate and key
 [certs] apiserver serving cert is signed for DNS names [k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local vertex-master] and IPs [10.96.0.1 192.168.66.169]
 [certs] Generating "apiserver-kubelet-client" certificate and key
 [certs] Generating "front-proxy-client" certificate and key
 [certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
 [certs] Using the existing "sa" key
 [kubeconfig] Generating kubeconfig files
 [kubeconfig] Using kubeconfig folder "/etc/kubernetes"
 [kubeconfig] Writing "admin.conf" kubeconfig file
 [kubeconfig] Writing "controller-manager.conf" kubeconfig file
 [kubeconfig] Writing "scheduler.conf" kubeconfig file
 [control-plane] Using manifest folder "/etc/kubernetes/manifests"
 [control-plane] Creating static Pod manifest for "kube-apiserver"
 [control-plane] Creating static Pod manifest for "kube-controller-manager"
 [control-plane] Creating static Pod manifest for "kube-scheduler"
 [check-etcd] Checking that the etcd cluster is healthy
 [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
 [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
 [kubelet-start] Starting the kubelet
 [kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
 [kubelet-check] The kubelet is healthy after 501.761616ms
 [kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
 [etcd] Announced new etcd member joining to the existing etcd cluster
 [etcd] Creating static Pod manifest for "etcd"
 {"level":"warn","ts":"2025-07-18T09:27:36.040077Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
 [...]
 {"level":"warn","ts":"2025-07-18T09:27:44.976805Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
 [etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
 [mark-control-plane] Marking the node vertex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
 [mark-control-plane] Marking the node vertex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
 This node has joined the cluster and a new control plane instance was created:
 * Certificate signing request was sent to apiserver and approval was received.
 * The Kubelet was informed of the new secure connection details.
 * Control plane label and taint were applied to the new node.
 * The Kubernetes control plane instances scaled up.
 * A new etcd member was added to the local/stacked etcd cluster.
 To start administering your cluster from this node, you need to run the following as a regular user:
        mkdir -p $HOME/.kube
        sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
        sudo chown $(id -u):$(id -g) $HOME/.kube/config
 Run 'kubectl get nodes' to see this node join the cluster.
 ```
 #### Regenerate Certificates
 If the certificate is expired, you would see a message like this on the `kubeadm join` command:
 ```plaintext
 [download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
 ```
 If so, re-upload the certificates and generate a new decryption key, use the following command on a control plane node that is already joined to the cluster:
 ```bash
 sudo kubeadm init phase upload-certs --upload-certs
 ```
 ```plaintext
 I0718 09:26:12.448472   18624 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
 [upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
 [upload-certs] Using certificate key:
 7531149107ebc3caf4990f94d19824aecf39d93b84ee1b9c86aee84c04e76656
 ```
 #### Generate Token
 Paired with the certificate, you'll need a new token, this will print the whole join command as control plane: 
 ```bash
 sudo kubeadm token create --print-join-command --certificate-key <certificate-key>
 ```
 Use the command given to join the Kubernetes cluster on the desired node as master.
 ### Join Workers
 You can join any number of worker nodes by running the following
 ```bash
 sudo kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
        --discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
 ```
 ```plaintext
 [preflight] Running pre-flight checks
 [preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
 [preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
 [kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
 [kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
 [kubelet-start] Starting the kubelet
 [kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
 [kubelet-check] The kubelet is healthy after 506.731798ms
 [kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
 This node has joined the cluster:
 * Certificate signing request was sent to apiserver and a response was received.
 * The Kubelet was informed of the new secure connection details.
 Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
 ```
 Again here if you missed the output of the `kubeadm init`, you can generate a new token and the full `join` command:
 ```bash
 sudo kubeadm token create --print-join-command
 ```
 Use the command given to join the Kubernetes cluster on the desired node as worker.
 ### Verify Cluster
 From your controller, you can verify if all the nodes joined the cluster and are in the `Ready` status:
 ```bash
 kubectl get node
 ```
 ```plaintext
 NAME            STATUS   ROLES           AGE     VERSION
 apex-master     Ready    control-plane   154m    v1.32.7
 apex-worker     Ready    <none>          5m14s   v1.32.7
 vertex-master   Ready    control-plane   26m     v1.32.7
 vertex-worker   Ready    <none>          3m39s   v1.32.7
 zenith-master   Ready    control-plane   23m     v1.32.7
 zenith-worker   Ready    <none>          3m26s   v1.32.7
 ```
 To validate that your cluster has proper network connectivity:
 ```bash
 cilium connectivity test
 ```
 ```plaintext
 __   Monitor aggregation detected, will skip some flow validation steps
   [kubernetes] Creating namespace cilium-test-1 for connectivity check...
 __ [kubernetes] Deploying echo-same-node service...
 __ [kubernetes] Deploying DNS test server configmap...
 __ [kubernetes] Deploying same-node deployment...
 __ [kubernetes] Deploying client deployment...
 __ [kubernetes] Deploying client2 deployment...
 __ [kubernetes] Deploying client3 deployment...
 __ [kubernetes] Deploying echo-other-node service...
 __ [kubernetes] Deploying other-node deployment...
 __ [host-netns] Deploying kubernetes daemonset...
 __ [host-netns-non-cilium] Deploying kubernetes daemonset...
 __   Skipping tests that require a node Without Cilium
   [kubernetes] Waiting for deployment cilium-test-1/client to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/client2 to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/echo-same-node to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/client3 to become ready...
 __ [kubernetes] Waiting for deployment cilium-test-1/echo-other-node to become ready...
 __ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
 __ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach default/kubernetes service...
 __ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach default/kubernetes service...
 __ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach default/kubernetes service...
 __ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to become ready...
 __ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-6824w
 __ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
 __ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to become ready...
 __ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-6824w
 __ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
 __ [kubernetes] Waiting for NodePort 192.168.66.166:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.166:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.172:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.172:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.167:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.167:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.168:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.168:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.169:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.169:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.170:32391 (cilium-test-1/echo-other-node) to become ready...
 __ [kubernetes] Waiting for NodePort 192.168.66.170:32055 (cilium-test-1/echo-same-node) to become ready...
 __ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns-non-cilium to become ready...
 __ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns to become ready...
 __   Skipping IPCache check
   Enabling Hubble telescope...
 __   Unable to contact Hubble Relay, disabling Hubble telescope and flow validation: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp [::1]:4245: connect: connection refused"
     Expose Relay locally with:
   cilium hubble enable
   cilium hubble port-forward&
 __   Cilium version: 1.17.5
  [cilium-test-1] Running 123 tests ...
 [=] [cilium-test-1] Test [no-policies] [1/123]
 [...]
 [=] [cilium-test-1] Test [check-log-errors] [123/123]
 .................................................
 __ [cilium-test-1] All 73 tests (739 actions) successful, 50 tests skipped, 1 scenarios skipped.
 ```
 ⌛This connectivity test could take up to 30 minutes.
 ---
 ## Conclusion
 🚀 Our highly available Kubernetes cluster is ready!
 In this post, we walked through the **manual creation of a Kubernetes cluster** in my homelab using `kubeadm`, on top of 6 Ubuntu VMs (3 masters and 3 workers) previously provisioned with Terraform on Proxmox.
 We went step by step:
 - Preparing the nodes with the required tools, kernel modules, and container runtime
 - Installing the Kubernetes packages
 - Bootstrapping the cluster from the first master node
 - Joining additional control-plane and worker nodes
 - Verifying that the cluster is healthy and ready
 This manual approach helps to demystify how Kubernetes clusters are built behind the scenes. It’s a solid foundation before automating the process in future posts using tools like Ansible.
 Stay tuned, next time we’ll look into automating all of this!
--- a/content/post/9-expose-kubernetes-pods-externally-ingress-tls.fr.md
+++ b/content/post/9-expose-kubernetes-pods-externally-ingress-tls.fr.md
@@ -0,0 +1,634 @@
 ---
 slug: expose-kubernetes-pods-externally-ingress-tls
 title: Exposer des Pods Kubernetes en externe avec Ingress et TLS
 description: Découvrez comment exposer des pods Kubernetes en externe avec Services, Ingress et TLS grâce à BGP, NGINX et Cert-Manager dans un homelab.
 date: 2025-08-19
 draft: false
 tags:
  - kubernetes
  - helm
  - bgp
  - opnsense
  - cilium
  - nginx-ingress-controller
  - cert-manager
 categories:
  - homelab
 ---
 ## Intro
 Après avoir construit mon propre cluster Kubernetes dans mon homelab avec `kubeadm` dans [cet article]({{< ref "post/8-create-manual-kubernetes-cluster-kubeadm" >}}), mon prochain défi est d’exposer un pod simple à l’extérieur, accessible via une URL et sécurisé avec un certificat TLS validé par Let’s Encrypt.
 Pour y parvenir, j’ai besoin de configurer plusieurs composants :
 - **Service** : Expose le pod à l’intérieur du cluster et fournit un point d’accès.
 - **Ingress** : Définit des règles de routage pour exposer des services HTTP(S) à l’extérieur.
 - **Ingress Controller** : Surveille les ressources Ingress et gère réellement le routage du trafic.
 - **Certificats TLS** : Sécurisent le trafic en HTTPS grâce à des certificats délivrés par Let’s Encrypt.
 Cet article vous guide pas à pas pour comprendre comment fonctionne l’accès externe dans Kubernetes dans un environnement homelab.
 C'est parti.
 ---
 ## Helm
 J’utilise **Helm**, le gestionnaire de paquets de facto pour Kubernetes, afin d’installer des composants externes comme l’Ingress Controller ou cert-manager.
 ### Pourquoi Helm
 Helm simplifie le déploiement et la gestion des applications Kubernetes. Au lieu d’écrire et de maintenir de longs manifestes YAML, Helm permet d’installer des applications en une seule commande, en s’appuyant sur des charts versionnés et configurables.
 ### Installer Helm
 J’installe Helm sur mon hôte bastion LXC, qui dispose déjà d’un accès au cluster Kubernetes :
 ```bash
 curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null
 echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
 sudo apt update
 sudo apt install helm
 ```
 ---
 ## Services Kubernetes
 Avant de pouvoir exposer un pod à l’extérieur, il faut d’abord le rendre accessible à l’intérieur du cluster. C’est là qu’interviennent les **Services Kubernetes**.
 Les Services agissent comme un pont entre les pods et le réseau, garantissant que les applications restent accessibles même si les pods sont réordonnés ou redéployés.
 Il existe plusieurs types de Services Kubernetes, chacun avec un objectif différent :
 - **ClusterIP** expose le Service sur une IP interne au cluster, uniquement accessible depuis l’intérieur.
 - **NodePort** expose le Service sur un port statique de l’IP de chaque nœud, accessible depuis l’extérieur du cluster.
 - **LoadBalancer** expose le Service sur une IP externe, généralement via une intégration cloud (ou via BGP dans un homelab).
 ---
 ## Exposer un Service `LoadBalancer` avec BGP
 Au départ, j’ai envisagé d’utiliser **MetalLB** pour exposer les adresses IP des services sur mon réseau local. C’est ce que j’utilisais auparavant quand je dépendais de la box de mon FAI comme routeur principal. Mais après avoir lu cet article, [Use Cilium BGP integration with OPNsense](https://devopstales.github.io/kubernetes/cilium-opnsense-bgp/), je réalise que je peux obtenir le même résultat (voire mieux) en utilisant **BGP** avec mon routeur **OPNsense** et **Cilium**, mon CNI.
 ### Qu’est-ce que BGP ?
 BGP (_Border Gateway Protocol_) est un protocole de routage utilisé pour échanger des routes entre systèmes. Dans un homelab Kubernetes, BGP permet à tes nœuds Kubernetes d’annoncer directement leurs IPs à ton routeur ou firewall. Ton routeur sait alors exactement comment atteindre les adresses IP gérées par ton cluster.
 Au lieu que MetalLB gère l’allocation d’IP et les réponses ARP, tes nœuds disent directement à ton routeur : « Hé, c’est moi qui possède l’adresse 192.168.1.240 ».
 ### L’approche MetalLB classique
 Sans BGP, MetalLB en mode Layer 2 fonctionne comme ceci :
 - Il assigne une adresse IP `LoadBalancer` (par exemple `192.168.1.240`) depuis un pool.
 - Un nœud répond aux requêtes ARP pour cette IP sur ton LAN.
 Oui, MetalLB peut aussi fonctionner avec BGP, mais pourquoi l’utiliser si mon CNI (Cilium) le gère déjà nativement ?
 ### BGP avec Cilium
 Avec Cilium + BGP, tu obtiens :
 - L’agent Cilium du nœud annonce les IPs `LoadBalancer` via BGP.
 - Ton routeur apprend ces routes et les envoie au bon nœud.
 - Plus besoin de MetalLB.
 ### Configuration BGP
 BGP est désactivé par défaut, aussi bien sur OPNsense que sur Cilium. Activons-le des deux côtés.
 #### Sur OPNsense
 D’après la [documentation officielle OPNsense](https://docs.opnsense.org/manual/dynamic_routing.html#bgp-section), l’activation de BGP nécessite d’installer un plugin.
 Va dans `System` > `Firmware` > `Plugins` et installe le plugin **os-frr** :  
 ![  ](img/opnsense-add-os-frr-plugin.png)
 Installer le plugin `os-frr` dans OPNsense
 Une fois installé, active le plugin dans `Routing` > `General` :  
 ![  ](img/opnsense-enable-routing-frr-plugin.png)
 Activer le routage dans OPNsense
 Ensuite, rends-toi dans la section **BGP**. Dans l’onglet **General** :
 - Coche la case pour activer BGP.
 - Défini ton **ASN BGP**. J’ai choisi `64512`, le premier ASN privé de la plage réservée (voir [ASN table](https://en.wikipedia.org/wiki/Autonomous_system_\(Internet\)#ASN_Table)) :  
 ![  ](img/opnsense-enable-bgp.png)
 Ajoute ensuite tes voisins BGP. Je ne fais le peering qu’avec mes **nœuds workers** (puisque seuls eux hébergent des workloads). Pour chaque voisin :
 - Mets l’IP du nœud dans `Peer-IP`.
 - Utilise `64513` comme **Remote AS** (celui de Cilium).
 - Configure `Update-Source Interface` sur `Lab`.
 - Coche `Next-Hop-Self`.  
 ![  ](img/opnsense-bgp-create-neighbor.png)
 Voici la liste de mes voisins une fois configurés :  
 ![  ](img/opnsense-bgp-neighbor-list.png)
 Liste des voisins BGP
 N’oublie pas la règle firewall pour autoriser BGP (port `179/TCP`) depuis le VLAN **Lab** vers le firewall :  
 ![  ](img/opnsense-create-firewall-rule-bgp-peering.png)
 Autoriser TCP/179 de Lab vers OPNsense
 #### Dans Cilium
 J’ai déjà Cilium installé et je n’ai pas trouvé comment activer BGP avec la CLI, donc je l’ai simplement réinstallé avec l’option BGP :
 ```bash
 cilium uninstall
 cilium install --set bgpControlPlane.enabled=true
 ```
 Je configure uniquement les **nœuds workers** pour établir le peering BGP en les labellisant avec un `nodeSelector` :
 ```bash
 kubectl label node apex-worker node-role.kubernetes.io/worker=""
 kubectl label node vertex-worker node-role.kubernetes.io/worker=""
 kubectl label node zenith-worker node-role.kubernetes.io/worker=""
 ```
 ```plaintext
 NAME            STATUS   ROLES           AGE    VERSION
 apex-master     Ready    control-plane   5d4h   v1.32.7
 apex-worker     Ready    worker          5d1h   v1.32.7
 vertex-master   Ready    control-plane   5d1h   v1.32.7
 vertex-worker   Ready    worker          5d1h   v1.32.7
 zenith-master   Ready    control-plane   5d1h   v1.32.7
 zenith-worker   Ready    worker          5d1h   v1.32.7
 ```
 Pour la configuration BGP complète, j’ai besoin de :
 - **CiliumBGPClusterConfig** : paramètres BGP pour le cluster Cilium, incluant son ASN local et son pair.
 - **CiliumBGPPeerConfig** : définit les timers, le redémarrage gracieux et les routes annoncées.
 - **CiliumBGPAdvertisement** : indique quels services Kubernetes annoncer via BGP.
 - **CiliumLoadBalancerIPPool** : définit la plage d’IPs attribuées aux services `LoadBalancer`.
 ```yaml
 ---
 apiVersion: cilium.io/v2alpha1
 kind: CiliumBGPClusterConfig
 metadata:
  name: bgp-cluster
 spec:
  nodeSelector:
    matchLabels:
      node-role.kubernetes.io/worker: "" # Only for worker nodes
  bgpInstances:
  - name: "cilium-bgp-cluster"
    localASN: 64513 # Cilium ASN
    peers:
    - name: "pfSense-peer"
      peerASN: 64512 # OPNsense ASN
      peerAddress: 192.168.66.1  # OPNsense IP
      peerConfigRef:
        name: "bgp-peer"
 ---
 apiVersion: cilium.io/v2alpha1
 kind: CiliumBGPPeerConfig
 metadata:
  name: bgp-peer
 spec:
  timers:
    holdTimeSeconds: 9
    keepAliveTimeSeconds: 3
  gracefulRestart:
    enabled: true
    restartTimeSeconds: 15
  families:
    - afi: ipv4
      safi: unicast
      advertisements:
        matchLabels:
          advertise: "bgp"
 ---
 apiVersion: cilium.io/v2alpha1
 kind: CiliumBGPAdvertisement
 metadata:
  name: bgp-advertisement
  labels:
    advertise: bgp
 spec:
  advertisements:
    - advertisementType: "Service"
      service:
        addresses:
          - LoadBalancerIP
      selector:
        matchExpressions:
          - { key: somekey, operator: NotIn, values: [ never-used-value ] }
 ---
 apiVersion: "cilium.io/v2alpha1"
 kind: CiliumLoadBalancerIPPool
 metadata:
  name: "dmz"
 spec:
  blocks:
  - start: "192.168.55.20" # LB Range Start IP
    stop: "192.168.55.250" # LB Range End IP
 ```
 Applique la configuration :
 ```bash
 kubectl apply -f bgp.yaml 
 ciliumbgpclusterconfig.cilium.io/bgp-cluster created
 ciliumbgppeerconfig.cilium.io/bgp-peer created
 ciliumbgpadvertisement.cilium.io/bgp-advertisement created
 ciliumloadbalancerippool.cilium.io/dmz created
 ```
 Si tout fonctionne, tu devrais voir les sessions BGP **établies** avec tes workers :
 ```bash
 cilium bgp peers
 Node            Local AS   Peer AS   Peer Address   Session State   Uptime   Family         Received   Advertised
 apex-worker     64513      64512     192.168.66.1   established     6m30s    ipv4/unicast   1          2    
 vertex-worker   64513      64512     192.168.66.1   established     7m9s     ipv4/unicast   1          2    
 zenith-worker   64513      64512     192.168.66.1   established     6m13s    ipv4/unicast   1          2
 ```
 ### Déployer un Service `LoadBalancer` avec BGP
 Validons rapidement que la configuration fonctionne en déployant un `Deployment` de test et un `Service` de type `LoadBalancer` :
 ```yaml
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: test-lb
 spec:
  type: LoadBalancer
  ports:
  - port: 80
    targetPort: 80
    protocol: TCP
    name: http
  selector:
    svc: test-lb
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: nginx
 spec:
  selector:
    matchLabels:
      svc: test-lb
  template:
    metadata:
      labels:
        svc: test-lb
    spec:
      containers:
      - name: web
        image: nginx
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 80
        readinessProbe:
          httpGet:
            path: /
            port: 80
 ```
 Vérifions si le service obtient une IP externe :
 ```bash
 kubectl get services test-lb
 NAME         TYPE           CLUSTER-IP       EXTERNAL-IP     PORT(S)        AGE
 test-lb      LoadBalancer   10.100.167.198   192.168.55.20   80:31350/TCP   169m
 ```
 Le service a récupéré la première IP du pool défini : `192.168.55.20`.
 Depuis n’importe quel appareil du LAN, on peut tester l’accès sur le port 80 :  
 ![Test LoadBalancer service with BGP](img/k8s-test-loadbalancer-service-with-bgp.png)
 ✅ Notre pod est joignable via une IP `LoadBalancer` routée en BGP. Première étape réussie !
 ---
 ## Kubernetes Ingress
 Nous avons réussi à exposer un pod en externe en utilisant un service `LoadBalancer` et une adresse IP attribuée via BGP. Cette approche fonctionne très bien pour les tests, mais elle ne fonctionne pas à l’échelle.
 Imagine avoir 10, 20 ou 50 services différents. Est-ce que je voudrais vraiment allouer 50 adresses IP et encombrer mon firewall ainsi que mes tables de routage avec 50 entrées BGP ? Certainement pas.
 C’est là qu’intervient **Ingress**.
 ### Qu’est-ce qu’un Kubernetes Ingress ?
 Un Kubernetes **Ingress** est un objet API qui gère **l’accès externe aux services** d’un cluster, généralement en HTTP et HTTPS, le tout via un point d’entrée unique.
 Au lieu d’attribuer une IP par service, on définit des règles de routage basées sur :
 - **Des noms d’hôtes** (`app1.vezpi.me`, `blog.vezpi.me`, etc.)
 - **Des chemins** (`/grafana`, `/metrics`, etc.)
 Avec Ingress, je peux exposer plusieurs services via la même IP et le même port (souvent 443 pour HTTPS), et Kubernetes saura comment router la requête vers le bon service backend.
 Voici un exemple simple d’`Ingress`, qui route le trafic de `test.vezpi.me` vers le service `test-lb` sur le port 80 :
 ```yaml
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress
 spec:
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80
 ```
 ### Ingress Controller
 Un Ingress, en soi, n’est qu’un ensemble de règles de routage. Il ne traite pas réellement le trafic. Pour le rendre fonctionnel, il faut un **Ingress Controller**, qui va :
 - Surveiller l’API Kubernetes pour détecter les ressources `Ingress`.
 - Ouvrir les ports HTTP(S) via un service `LoadBalancer` ou `NodePort`.
 - Router le trafic vers le bon `Service` selon les règles de l’Ingress.
 Parmi les contrôleurs populaires, on retrouve NGINX, Traefik, HAProxy, et d’autres encore. Comme je cherchais quelque chose de simple, stable et largement adopté, j’ai choisi le **NGINX Ingress Controller**.
 ### Installer NGINX Ingress Controller
 J’utilise Helm pour installer le contrôleur, et je définis `controller.ingressClassResource.default=true` pour que tous mes futurs ingress l’utilisent par défaut :
 ```bash
 helm install ingress-nginx \
  --repo=https://kubernetes.github.io/ingress-nginx \
  --namespace=ingress-nginx \
  --create-namespace ingress-nginx \
  --set controller.ingressClassResource.default=true \
  --set controller.config.strict-validate-path-type=false
 ```
 Le contrôleur est déployé et expose un service `LoadBalancer`. Dans mon cas, il récupère la deuxième adresse IP disponible dans la plage BGP :
 ```bash
 NAME                       TYPE           CLUSTER-IP      EXTERNAL-IP     PORT(S)                      AGE   SELECTOR
 ingress-nginx-controller   LoadBalancer   10.106.236.13   192.168.55.21   80:31195/TCP,443:30974/TCP   75s   app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
 ```
 ### Réserver une IP statique pour le contrôleur
 Je veux m’assurer que l’Ingress Controller reçoive toujours la même adresse IP. Pour cela, j’ai créé deux pools d’IP Cilium distincts :
 - Un réservé pour l’Ingress Controller avec une seule IP.
 - Un pour tout le reste.
 ```yaml
 ---
 # Pool for Ingress Controller
 apiVersion: cilium.io/v2alpha1
 kind: CiliumLoadBalancerIPPool
 metadata:
  name: ingress-nginx
 spec:
  blocks:
    - cidr: 192.168.55.55/32
  serviceSelector:
    matchLabels:
      app.kubernetes.io/name: ingress-nginx
      app.kubernetes.io/component: controller
 ---
 # Default pool for other services
 apiVersion: cilium.io/v2alpha1
 kind: CiliumLoadBalancerIPPool
 metadata:
  name: default
 spec:
  blocks:
    - start: 192.168.55.100
      stop: 192.168.55.250
  serviceSelector:
    matchExpressions:
      - key: app.kubernetes.io/name
        operator: NotIn
        values:
          - ingress-nginx
 ```
 Après avoir remplacé le pool partagé par ces deux pools, l’Ingress Controller reçoit bien l’IP dédiée `192.168.55.55`, et le service `test-lb` obtient `192.168.55.100` comme prévu :
 ```bash
 NAMESPACE       NAME                                 TYPE           CLUSTER-IP       EXTERNAL-IP      PORT(S)                      AGE
 default         test-lb                              LoadBalancer   10.100.167.198   192.168.55.100   80:31350/TCP                 6h34m
 ingress-nginx   ingress-nginx-controller             LoadBalancer   10.106.236.13    192.168.55.55    80:31195/TCP,443:30974/TCP   24m
 ```
 ### Associer un Service à un Ingress
 Maintenant, connectons un service à ce contrôleur.
 Je commence par mettre à jour le service `LoadBalancer` d’origine pour le convertir en `ClusterIP` (puisque c’est désormais l’Ingress Controller qui l’exposera en externe) :
 ```yaml
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: test-lb
 spec:
  ports:
    - port: 80
      targetPort: 80
      protocol: TCP
      name: http
  selector:
    svc: test-lb
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress
 spec:
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80  
 ```
 Ensuite, j’applique le manifeste `Ingress` pour exposer le service en HTTP.
 Comme j’utilise le plugin **Caddy** dans OPNsense, j’ai encore besoin d’un routage local de type Layer 4 pour rediriger le trafic de `test.vezpi.me` vers l’adresse IP de l’Ingress Controller (`192.168.55.55`). Je crée donc une nouvelle règle dans le plugin Caddy.
 ![Create Layer4 router in Caddy plugin for OPNsense](img/opnsense-caddy-create-layer4-route-http.png)
 Puis je teste l’accès dans le navigateur :  
 ![  ](img/ingress-controller-nginx-test-simple-webserver.png)
 Test d’un Ingress en HTTP
 ✅ Mon pod est désormais accessible via son URL HTTP en utilisant un Ingress. Deuxième étape complétée !
 ---
 ## Connexion sécurisée avec TLS
 Exposer des services en HTTP simple est suffisant pour des tests, mais en pratique nous voulons presque toujours utiliser **HTTPS**. Les certificats TLS chiffrent le trafic et garantissent l’authenticité ainsi que la confiance pour les utilisateurs.
 ### Cert-Manager
 Pour automatiser la gestion des certificats dans Kubernetes, nous utilisons **Cert-Manager**. Il peut demander, renouveler et gérer les certificats TLS sans intervention manuelle.
 #### Installer Cert-Manager
 Nous le déployons avec Helm dans le cluster :
 ```bash
 helm repo add jetstack https://charts.jetstack.io
 helm repo update
 helm install cert-manager jetstack/cert-manager \
  --namespace cert-manager \
  --create-namespace \
  --set crds.enabled=true
 ```
 #### Configurer Cert-Manager
 Ensuite, nous configurons un **ClusterIssuer** pour Let’s Encrypt. Cette ressource indique à Cert-Manager comment demander des certificats :
 ```yaml
 ---
 apiVersion: cert-manager.io/v1
 kind: ClusterIssuer
 metadata:
  name: letsencrypt-staging
 spec:
  acme:
    server: https://acme-staging-v02.api.letsencrypt.org/directory
    email: <email>
    privateKeySecretRef:
      name: letsencrypt-staging-key
    solvers:
    - http01:
        ingress:
          ingressClassName: nginx
 ```
 ℹ️ Ici, je définis le serveur **staging** de Let’s Encrypt ACME pour les tests. Les certificats de staging ne sont pas reconnus par les navigateurs, mais ils évitent d’atteindre les limites strictes de Let’s Encrypt lors du développement.
 Appliquez-le :
 ```bash
 kubectl apply -f clusterissuer.yaml
 ```
 Vérifiez si votre `ClusterIssuer` est `Ready` :
 ```bash
 kubectl get clusterissuers.cert-manager.io                                                    
 NAME                  READY   AGE
 letsencrypt-staging   True    14m
 ```
 S’il ne devient pas `Ready`, utilisez `kubectl describe` sur la ressource pour le diagnostiquer.
 ### Ajouter TLS dans un Ingress
 Nous pouvons maintenant sécuriser notre service avec TLS en ajoutant une section `tls` dans la spécification `Ingress` et en référençant le `ClusterIssuer` :
 ```yaml
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress-https
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
    cert-manager.io/cluster-issuer: letsencrypt-staging
 spec:
  tls:
    - hosts:
      - test.vezpi.me
      secretName: test-vezpi-me-tls
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80
 ```
 En arrière-plan, Cert-Manager suit ce flux pour émettre le certificat :
 - Détecte l’`Ingress` avec `tls` et le `ClusterIssuer`.
 - Crée un CRD **Certificate** décrivant le certificat souhaité + l’emplacement du Secret.
 - Crée un CRD **Order** pour représenter une tentative d’émission avec Let’s Encrypt.
 - Crée un CRD **Challenge** (par ex. validation HTTP-01).
 - Met en place un Ingress/Pod temporaire pour résoudre le challenge.
 - Crée un CRD **CertificateRequest** et envoie le CSR à Let’s Encrypt.
 - Reçoit le certificat signé et le stocke dans un Secret Kubernetes.
 - L’Ingress utilise automatiquement ce Secret pour servir en HTTPS.
 ✅ Une fois ce processus terminé, votre Ingress est sécurisé avec un certificat TLS.  
 ![Certificat TLS validé avec le serveur de staging de Let’s Encrypt](img/k8s-test-deploy-service-tls-certificate-staging-lets-encrypt.png)
 ### Passer aux certificats de production
 Une fois que le staging fonctionne, nous pouvons passer au serveur **production** ACME pour obtenir un certificat Let’s Encrypt reconnu :
 ```yaml
 ---
 apiVersion: cert-manager.io/v1
 kind: ClusterIssuer
 metadata:
  name: letsencrypt
 spec:
  acme:
    server: https://acme-v02.api.letsencrypt.org/directory
    email: <email>
    privateKeySecretRef:
      name: letsencrypt-key
    solvers:
    - http01:
        ingress:
          ingressClassName: nginx
 ```
 Mettez à jour l’`Ingress` pour pointer vers le nouveau `ClusterIssuer` :
 ```yaml
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress-https
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt
 spec:
  tls:
    - hosts:
      - test.vezpi.me
      secretName: test-vezpi-me-tls
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80
 ```
 Comme le certificat de staging est encore stocké dans le Secret, je le supprime pour forcer une nouvelle demande en production :
 ```bash
 kubectl delete secret test-vezpi-me-tls
 ```
 🎉 Mon `Ingress` est désormais sécurisé avec un certificat TLS valide délivré par Let’s Encrypt. Les requêtes vers `https://test.vezpi.me` sont chiffrées de bout en bout et routées par le NGINX Ingress Controller jusqu’à mon pod `nginx` :  
 ![Ingress HTTPS avec certificat validé par Let’s Encrypt](img/k8s-deploy-test-service-tls-certificate-lets-encrypt.png)
 ---
 ## Conclusion
 Dans ce parcours, je suis parti des bases, en exposant un simple pod avec un service `LoadBalancer`, puis j’ai construit étape par étape une configuration prête pour la production :
 - Compréhension des **Services Kubernetes** et de leurs différents types.
 - Utilisation du **BGP avec Cilium** et OPNsense pour attribuer des IP externes directement depuis mon réseau.
 - Introduction des **Ingress** pour mieux passer à l’échelle, en exposant plusieurs services via un point d’entrée unique.
 - Installation du **NGINX Ingress Controller** pour gérer le routage.
 - Automatisation de la gestion des certificats avec **Cert-Manager**, afin de sécuriser mes services avec des certificats TLS Let’s Encrypt.
 🚀 Résultat : mon pod est maintenant accessible via une véritable URL, sécurisé en HTTPS, comme n’importe quelle application web moderne.
 C’est une étape importante dans mon aventure Kubernetes en homelab. Dans le prochain article, je souhaite explorer le stockage persistant et connecter mon cluster Kubernetes à mon setup **Ceph** sous **Proxmox**.
 A la prochaine !
--- a/content/post/9-expose-kubernetes-pods-externally-ingress-tls.md
+++ b/content/post/9-expose-kubernetes-pods-externally-ingress-tls.md
@@ -0,0 +1,630 @@
 ---
 slug: expose-kubernetes-pods-externally-ingress-tls
 title: Exposing Kubernetes Pods externally with Ingress and TLS
 description: Learn how to expose Kubernetes pods externally with Services, Ingress, and TLS using BGP, NGINX, and Cert-Manager in a homelab setup.
 date: 2025-08-19
 draft: false
 tags:
  - kubernetes
  - helm
  - bgp
  - opnsense
  - cilium
  - nginx-ingress-controller
  - cert-manager
 categories:
  - homelab
 ---
 ## Intro
 After building my own Kubernetes cluster in my homelab using `kubeadm` in [that post]({{< ref "post/8-create-manual-kubernetes-cluster-kubeadm" >}}), my next challenge is to expose a simple pod externally, reachable with an URL and secured with a TLS certificate verified by Let's Encrypt.
 To achieve this, I needed to configure several components:
 - **Service**: Expose the pod inside the cluster and provide an access point.
 - **Ingress**: Define routing rules to expose HTTP(S) services externally.
 - **Ingress Controller**: Listen to Ingress resources and handles actual traffic routing.
 - **TLS Certificates**: Secure traffic with HTTPS using certificates from Let’s Encrypt.
 This post guides you through each step to understand how external access works in Kubernetes in a homelab environment.
 Let’s dive in.
 ---
 ## Helm
 I use **Helm**, the de facto package manager for Kubernetes, to install external components like the Ingress controller or cert-manager.
 ### Why Helm
 Helm simplifies the deployment and management of Kubernetes applications. Instead of writing and maintaining large YAML manifests, Helm lets you install applications with a single command, using versioned and configurable charts.
 ### Install Helm
 I install Helm on my LXC bastion host, which already has access to the Kubernetes cluster:
 ```bash
 curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null
 echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
 sudo apt update
 sudo apt install helm
 ```
 ---
 ## Kubernetes Services
 Before we can expose a pod externally, we need a way to make it reachable inside the cluster. That’s where Kubernetes Services come in.
 Services act as the bridge between pods and the network, making sure applications remain reachable even as pods are rescheduled.
 There are several types of Kubernetes Services, each serving a different purpose:
 - **ClusterIP** exposes the Service on a cluster-internal IP, only accessible inside the cluster.
 - **NodePort** exposes the Service on a static port on each node’s IP, accessible from outside the cluster.
 - **LoadBalancer** exposes the Service on an external IP, typically using cloud integrations (or BGP in a homelab).
 ---
 ## Expose a `LoadBalancer` Service with BGP
 Initially, I considered using **MetalLB** to expose service IPs to my home network. That’s what I used in the past when relying on my ISP box as the main router. But after reading this post, [Use Cilium BGP integration with OPNsense](https://devopstales.github.io/kubernetes/cilium-opnsense-bgp/), I realize I can achieve the same (or even better) using BGP with my **OPNsense** router and **Cilium**, my CNI.
 ### What Is BGP?
 BGP (Border Gateway Protocol) is a routing protocol used to exchange network routes between systems. In the Kubernetes homelab context, BGP allows your Kubernetes nodes to advertise IPs directly to your network router or firewall. Your router then knows how to reach the IPs managed by your cluster.
 So instead of MetalLB managing IP allocation and ARP replies, your nodes directly tell your router: « Hey, I own 192.168.1.240 ».
 ### Legacy MetalLB Approach
 Without BGP, MetalLB in Layer 2 mode works like this:
 - Assigns a `LoadBalancer` IP (e.g., `192.168.1.240`) from a pool.
 - One node responds to ARP for that IP on your LAN.
 Yes, MetalLB can also work with BGP, but what if my CNI (Cilium) can handle it out of the box?
 ### BGP with Cilium
 With Cilium + BGP, you get:
 - Cilium’s agent on the node advertises LoadBalancer IPs over BGP.
 - Your router learns that IP and routes to the correct node.
 - No need for MetalLB.
 ### BGP Setup
 BGP is disabled by default on both OPNsense and Cilium. Let’s enable it on both ends.
 #### On OPNsense
 According to the [official OPNsense documentation](https://docs.opnsense.org/manual/dynamic_routing.html#bgp-section), enabling BGP requires installing a plugin.
 Head to `System` > `Firmware` > `Plugins` and install the `os-frr` plugin:  
 ![  ](img/opnsense-add-os-frr-plugin.png)
 Install `os-frr` plugin in OPNsense
 Once installed, enable the plugin under `Routing` > `General`:  
 ![  ](img/opnsense-enable-routing-frr-plugin.png)
 Enable routing in OPNsense
 Then navigate to the `BGP` section. In the **General** tab:
 - Tick the box to enable BGP.
 - Set your **BGP ASN**. I used `64512`, the first private ASN from the reserved range (see [ASN table](https://en.wikipedia.org/wiki/Autonomous_system_\(Internet\)#ASN_Table)):
 ![  ](img/opnsense-enable-bgp.png)
 General BGP configuration in OPNsense
 Now create your BGP neighbors. I’m only peering with my **worker nodes** (since only they run workloads). For each neighbor:
 - Set the node’s IP in `Peer-IP`
 - Use `64513` as the **Remote AS** (Cilium’s ASN)
 - Set `Update-Source Interface` to `Lab`
 - Tick `Next-Hop-Self`:  
 ![  ](img/opnsense-bgp-create-neighbor.png)
 BGP neighbor configuration in OPNsense
 Here’s how my neighbors list looks once complete:  
 ![  ](img/opnsense-bgp-neighbor-list.png)
 BGP neighbor list
 Don’t forget to create a firewall rule allowing BGP (port `179/TCP`) from the **Lab** VLAN to the firewall:  
 ![  ](img/opnsense-create-firewall-rule-bgp-peering.png)
 Allow TCP/179 from Lab to OPNsense
 #### In Cilium
 I already have Cilium installed and couldn’t find a way to enable BGP with the CLI, so I simply reinstall it with the BGP option:
 ```bash
 cilium uninstall
 cilium install --set bgpControlPlane.enabled=true
 ```
 I configure only worker nodes to establish BGP peering by labeling them for the `nodeSelector`:
 ```bash
 kubectl label node apex-worker node-role.kubernetes.io/worker=""
 kubectl label node vertex-worker node-role.kubernetes.io/worker=""
 kubectl label node zenith-worker node-role.kubernetes.io/worker=""
 ```
 ```plaintext
 NAME            STATUS   ROLES           AGE    VERSION
 apex-master     Ready    control-plane   5d4h   v1.32.7
 apex-worker     Ready    worker          5d1h   v1.32.7
 vertex-master   Ready    control-plane   5d1h   v1.32.7
 vertex-worker   Ready    worker          5d1h   v1.32.7
 zenith-master   Ready    control-plane   5d1h   v1.32.7
 zenith-worker   Ready    worker          5d1h   v1.32.7
 ```
 For the entire BGP configuration, I need:
 - **CiliumBGPClusterConfig**: BGP settings for the Cilium cluster, including its local ASN and its peer
 - **CiliumBGPPeerConfig**: Sets BGP timers, graceful restart, and route advertisement settings.
 - **CiliumBGPAdvertisement**: Defines which Kubernetes services should be advertised via BGP.
 - **CiliumLoadBalancerIPPool**: Configures the range of IPs assigned to Kubernetes LoadBalancer services.
 ```yaml
 ---
 apiVersion: cilium.io/v2alpha1
 kind: CiliumBGPClusterConfig
 metadata:
  name: bgp-cluster
 spec:
  nodeSelector:
    matchLabels:
      node-role.kubernetes.io/worker: "" # Only for worker nodes
  bgpInstances:
  - name: "cilium-bgp-cluster"
    localASN: 64513 # Cilium ASN
    peers:
    - name: "pfSense-peer"
      peerASN: 64512 # OPNsense ASN
      peerAddress: 192.168.66.1  # OPNsense IP
      peerConfigRef:
        name: "bgp-peer"
 ---
 apiVersion: cilium.io/v2alpha1
 kind: CiliumBGPPeerConfig
 metadata:
  name: bgp-peer
 spec:
  timers:
    holdTimeSeconds: 9
    keepAliveTimeSeconds: 3
  gracefulRestart:
    enabled: true
    restartTimeSeconds: 15
  families:
    - afi: ipv4
      safi: unicast
      advertisements:
        matchLabels:
          advertise: "bgp"
 ---
 apiVersion: cilium.io/v2alpha1
 kind: CiliumBGPAdvertisement
 metadata:
  name: bgp-advertisement
  labels:
    advertise: bgp
 spec:
  advertisements:
    - advertisementType: "Service"
      service:
        addresses:
          - LoadBalancerIP
      selector:
        matchExpressions:
          - { key: somekey, operator: NotIn, values: [ never-used-value ] }
 ---
 apiVersion: "cilium.io/v2alpha1"
 kind: CiliumLoadBalancerIPPool
 metadata:
  name: "dmz"
 spec:
  blocks:
  - start: "192.168.55.20" # LB Range Start IP
    stop: "192.168.55.250" # LB Range End IP
 ```
 Apply it:
 ```bash
 kubectl apply -f bgp.yaml 
 ciliumbgpclusterconfig.cilium.io/bgp-cluster created
 ciliumbgppeerconfig.cilium.io/bgp-peer created
 ciliumbgpadvertisement.cilium.io/bgp-advertisement created
 ciliumloadbalancerippool.cilium.io/dmz created
 ```
 If everything works, you should see the BGP sessions **established** with your workers:
 ```bash
 cilium bgp peers
 Node            Local AS   Peer AS   Peer Address   Session State   Uptime   Family         Received   Advertised
 apex-worker     64513      64512     192.168.66.1   established     6m30s    ipv4/unicast   1          2    
 vertex-worker   64513      64512     192.168.66.1   established     7m9s     ipv4/unicast   1          2    
 zenith-worker   64513      64512     192.168.66.1   established     6m13s    ipv4/unicast   1          2
 ```
 ### Deploying a `LoadBalancer` Service with BGP
 Let’s quickly validate that the setup works by deploying a test `Deployment` and `LoadBalancer` `Service`:
 ```yaml
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: test-lb
 spec:
  type: LoadBalancer
  ports:
  - port: 80
    targetPort: 80
    protocol: TCP
    name: http
  selector:
    svc: test-lb
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: nginx
 spec:
  selector:
    matchLabels:
      svc: test-lb
  template:
    metadata:
      labels:
        svc: test-lb
    spec:
      containers:
      - name: web
        image: nginx
        imagePullPolicy: IfNotPresent
        ports:
        - containerPort: 80
        readinessProbe:
          httpGet:
            path: /
            port: 80
 ```
 Check if it gets an external IP:
 ```bash
 kubectl get services test-lb
 NAME         TYPE           CLUSTER-IP       EXTERNAL-IP     PORT(S)        AGE
 test-lb      LoadBalancer   10.100.167.198   192.168.55.20   80:31350/TCP   169m
 ```
 The service got the first IP from our defined pool: `192.168.55.20`.
 Now from any device on the LAN, try to reach that IP on port 80:
 ![Test LoadBalancer service with BGP](img/k8s-test-loadbalancer-service-with-bgp.png)
 ✅ Our pod is reachable through BGP-routed `LoadBalancer` IP, first step successful!
 ---
 ## Kubernetes Ingress
 We managed to expose a pod externally using a `LoadBalancer` service and a BGP-assigned IP address. This approach works great for testing, but it doesn't scale well.
 Imagine having 10, 20, or 50 different services, would I really want to allocate 50 IP addresses, and clutter my firewall and routing tables with 50 BGP entries? Definitely not.
 That’s where **Ingress** kicks in.
 ### What Is a Kubernetes Ingress?
 A Kubernetes **Ingress** is an API object that manages **external access to services** in a cluster, typically HTTP and HTTPS, all through a single entry point.
 Instead of assigning one IP per service, you define routing rules based on:
 - **Hostnames** (`app1.vezpi.me`, `blog.vezpi.me`, etc.)
 - **Paths** (`/grafana`, `/metrics`, etc.)
 With Ingress, I can expose multiple services over the same IP and port (usually 443 for HTTPS), and Kubernetes will know how to route the request to the right backend service.
 Here is an example of a simple `Ingress`, routing traffic of `test.vezpi.me` to the `test-lb` service on port 80:
 ```yaml
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress
 spec:
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80
 ```
 ### Ingress Controller
 On its own, an Ingress is just a set of routing rules. It doesn’t actually handle traffic. To bring it to life, I need an **Ingress Controller**, which will:
 - Watches the Kubernetes API for `Ingress` resources.
 - Opens HTTP(S) ports on a `LoadBalancer` or `NodePort` service.
 - Routes traffic to the correct `Service` based on the `Ingress` rules.
 Popular controllers include NGINX, Traefik, HAProxy, and more. Since I was looking for something simple, stable, and widely adopted, I picked the **NGINX Ingress Controller**.
 ### Install NGINX Ingress Controller
 I use Helm to install the controller, and I set `controller.ingressClassResource.default=true` so that all my future ingresses use it by default:
 ```bash
 helm install ingress-nginx \
  --repo=https://kubernetes.github.io/ingress-nginx \
  --namespace=ingress-nginx \
  --create-namespace ingress-nginx \
  --set controller.ingressClassResource.default=true \
  --set controller.config.strict-validate-path-type=false
 ```
 The controller is deployed and exposes a `LoadBalancer` service. In my setup, it picks the second available IP in the BGP range:
 ```bash
 NAME                       TYPE           CLUSTER-IP      EXTERNAL-IP     PORT(S)                      AGE   SELECTOR
 ingress-nginx-controller   LoadBalancer   10.106.236.13   192.168.55.21   80:31195/TCP,443:30974/TCP   75s   app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
 ```
 ### Reserving a Static IP for the Controller
 I want to make sure the Ingress Controller always receives the same IP address. To do this, I created two separate Cilium IP pools:
 - One dedicated for the Ingress Controller with a single IP.
 - One for everything else.
 ```yaml
 ---
 # Pool for Ingress Controller
 apiVersion: cilium.io/v2alpha1
 kind: CiliumLoadBalancerIPPool
 metadata:
  name: ingress-nginx
 spec:
  blocks:
    - cidr: 192.168.55.55/32
  serviceSelector:
    matchLabels:
      app.kubernetes.io/name: ingress-nginx
      app.kubernetes.io/component: controller
 ---
 # Default pool for other services
 apiVersion: cilium.io/v2alpha1
 kind: CiliumLoadBalancerIPPool
 metadata:
  name: default
 spec:
  blocks:
    - start: 192.168.55.100
      stop: 192.168.55.250
  serviceSelector:
    matchExpressions:
      - key: app.kubernetes.io/name
        operator: NotIn
        values:
          - ingress-nginx
 ```
 After replacing the previous shared pool with these two, the Ingress Controller gets the desired IP `192.168.55.55`, and the `test-lb` service picks `192.168.55.100` as expected:
 ```bash
 NAMESPACE       NAME                                 TYPE           CLUSTER-IP       EXTERNAL-IP      PORT(S)                      AGE
 default         test-lb                              LoadBalancer   10.100.167.198   192.168.55.100   80:31350/TCP                 6h34m
 ingress-nginx   ingress-nginx-controller             LoadBalancer   10.106.236.13    192.168.55.55    80:31195/TCP,443:30974/TCP   24m
 ```
 ### Associate a Service to an Ingress
 Now let’s wire up a service to this controller.
 First, I update the original `LoadBalancer` service and convert it into a `ClusterIP` (since the Ingress Controller will now expose it externally):
 ```yaml
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: test-lb
 spec:
  ports:
    - port: 80
      targetPort: 80
      protocol: TCP
      name: http
  selector:
    svc: test-lb
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress
 spec:
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80  
 ```
 Then I apply the `Ingress` manifest as shown earlier to expose the service over HTTP.
 Since I'm using the Caddy plugin on OPNsense, I still need a local Layer 4 route to forward traffic for `test.vezpi.me` to the NGINX Ingress Controller IP (`192.168.55.55`). I simply create a new rule in the Caddy plugin.
 ![Create Layer4 router in Caddy plugin for OPNsense](img/opnsense-caddy-create-layer4-route-http.png)
 Now let’s test it in the browser:
 ![  ](img/ingress-controller-nginx-test-simple-webserver.png)
 Test Ingress on HTTP
 ✅ Our pod is now reachable on its HTTP URL using an Ingress. Second step complete!
 ---
 ## Secure Connection with TLS
 Exposing services over plain HTTP is fine for testing, but in practice we almost always want **HTTPS**. TLS certificates encrypt traffic and provide authenticity and trust to users.
 ### Cert-Manager
 To automate certificate management in Kubernetes, we use **Cert-Manager**. It can request, renew, and manage TLS certificates without manual intervention.
 #### Install Cert-Manager
 We deploy it with Helm on the cluster:
 ```bash
 helm repo add jetstack https://charts.jetstack.io
 helm repo update
 helm install cert-manager jetstack/cert-manager \
  --namespace cert-manager \
  --create-namespace \
  --set crds.enabled=true
 ```
 #### Setup Cert-Manager
 Next, we configure a **ClusterIssuer** for Let’s Encrypt. This resource tells Cert-Manager how to request certificates:
 ```yaml
 ---
 apiVersion: cert-manager.io/v1
 kind: ClusterIssuer
 metadata:
  name: letsencrypt-staging
 spec:
  acme:
    server: https://acme-staging-v02.api.letsencrypt.org/directory
    email: <email>
    privateKeySecretRef:
      name: letsencrypt-staging-key
    solvers:
    - http01:
        ingress:
          ingressClassName: nginx
 ```
 ℹ️ Here I define the **staging** Let’s Encrypt ACME server for testing purposes. Staging certificates are not trusted by browsers, but they prevent hitting Let’s Encrypt’s strict rate limits during development.
 Apply it:
 ```bash
 kubectl apply -f clusterissuer.yaml
 ```
 Verify if your `ClusterIssuer` is `Ready`:
 ```bash
 kubectl get clusterissuers.cert-manager.io                                                    
 NAME                  READY   AGE
 letsencrypt-staging   True    14m
 ```
 If it doesn’t become `Ready`, use `kubectl describe` on the resource to troubleshoot.
 ### Add TLS in an Ingress
 Now we can secure our service with TLS by adding a `tls` section in the `Ingress` spec and referencing the `ClusterIssuer`:
 ```yaml
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress-https
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
    cert-manager.io/cluster-issuer: letsencrypt-staging
 spec:
  tls:
    - hosts:
      - test.vezpi.me
      secretName: test-vezpi-me-tls
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80
 ```
 Behind the scenes, Cert-Manager goes through this workflow to issue the certificate:
 - Detects the `Ingress` with `tls` and the `ClusterIssuer`.
 - Creates a Certificate CRD that describes the desired cert + Secret storage.
 - Creates an Order CRD to represent one issuance attempt with Let’s Encrypt.
 - Creates a Challenge CRD (e.g., HTTP-01 validation).
 - Provisions a temporary solver Ingress/Pod to solve the challenge.
 - Creates a CertificateRequest CRD and sends the CSR to Let’s Encrypt.
 - Receives the signed certificate and stores it in a Kubernetes Secret.
 - The Ingress automatically uses the Secret to serve HTTPS.
 ✅ Once this process completes, your Ingress is secured with a TLS certificate.
 ![Certificat TLS validé avec le serveur de staging de Let’s Encrypt](img/k8s-test-deploy-service-tls-certificate-staging-lets-encrypt.png)
 ### Switch to Production Certificates
 Once staging works, we can safely switch to the **production** ACME server to get a trusted certificate from Let’s Encrypt:
 ```yaml
 ---
 apiVersion: cert-manager.io/v1
 kind: ClusterIssuer
 metadata:
  name: letsencrypt
 spec:
  acme:
    server: https://acme-v02.api.letsencrypt.org/directory
    email: <email>
    privateKeySecretRef:
      name: letsencrypt-key
    solvers:
    - http01:
        ingress:
          ingressClassName: nginx
 ```
 Update the `Ingress` to reference the new `ClusterIssuer`:
 ```yaml
 ---
 apiVersion: networking.k8s.io/v1
 kind: Ingress
 metadata:
  name: test-ingress-https
  annotations:
    cert-manager.io/cluster-issuer: letsencrypt
 spec:
  tls:
    - hosts:
      - test.vezpi.me
      secretName: test-vezpi-me-tls
  rules:
    - host: test.vezpi.me
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: test-lb
                port:
                  number: 80
 ```
 Since the staging certificate is still stored in the Secret, I delete it to trigger a fresh request against production:
 ```bash
 kubectl delete secret test-vezpi-me-tls
 ```
 🎉 My `Ingress` is now secured with a valid TLS certificate from Let’s Encrypt. Requests to `https://test.vezpi.me` are encrypted end-to-end and routed by the NGINX Ingress Controller to my `nginx` pod:
 ![Ingress HTTPS avec certificat validé par Let’s Encrypt](img/k8s-deploy-test-service-tls-certificate-lets-encrypt.png)
 ---
 ## Conclusion
 In this journey, I started from the basics, exposing a single pod with a `LoadBalancer` service, and step by step built a production-ready setup:
 - Learned about **Kubernetes Services** and their different types.
 - Used **BGP with Cilium** and OPNsense to assign external IPs directly from my network.
 - Introduced **Ingress** to scale better, exposing multiple services through a single entry point.
 - Installed the **NGINX Ingress Controller** to handle routing.
 - Automated certificate management with **Cert-Manager**, securing my services with Let’s Encrypt TLS certificates.
 🚀 The result: my pod is now reachable at a real URL, secured with HTTPS, just like any modern web application.
 This is a huge milestone in my homelab Kubernetes journey. In the next article, I want to explore persistent storage and connect my Kubernetes cluster to my **Ceph** setup on **Proxmox**.
--- a/content/post/random-post.md
+++ b/content/post/random-post.md
@@ -1,5 +1,5 @@
 ---
-title: Random post
+title: Playground
 description: 
 date: 2025-06-25
 draft: true
@@ -9,4 +9,10 @@ categories:
 Hi there, how are you ?
-I'm testing
+I'm ==testing==
 ## Emoji
 🚀💡🔧🔁⚙️📝📌✅⚠️🍒❌ℹ️⌛🚨🎉
 [post]({{< ref "post/0-template" >}})
--- a/content/post/terraform-create-pve-vm-module-proxmox.md
+++ b/content/post/terraform-create-pve-vm-module-proxmox.md
@@ -1,43 +0,0 @@
 ---
 slug: 
 title: Template
 description: 
 date: 
 draft: true
 tags: 
 categories:
 ---
 ## Develop a Terraform Module
 In the final step of this article, I will show you how you can transform this piece of code in a reusable Terraform module.
 ### What is a Terraform Module?
 Terraform modules are reusable components that let you organize and simplify your infrastructure code by grouping related resources into a single unit. Instead of repeating the same configuration across multiple places, you can define it once in a module and use it wherever needed, just like calling a function in programming.
 Modules can be local (within your project) or remote (from the Terraform Registry or a Git repository), making it easy to share and standardize infrastructure patterns across teams or projects. By using modules, you make your code more readable, maintainable, and scalable.
 ### Terraform Code
 We will now transform the Terraform code above by creating our own module called `pve_vm
 > 📌 Reminder, you can find all the code I have written in my [Homelab repo](https://git.vezpi.me/Vezpi/Homelab/), the following code is located [here](https://git.vezpi.me/Vezpi/Homelab/src/commit/22f64034175a6a4642a2c7b6656688f16ece5ba1/terraform/projects/simple-vm). Don't forget to match your variables with your environment!
 #### Code Structure
 ```plaintext
 terraform
 |-- modules
 |   `-- pve_vm
 |       |-- main.tf
 |       |-- provider.tf
 |       `-- variables.tf
 `-- projects
    `-- simple-vm-with-module
        |-- credentials.auto.tfvars
        |-- main.tf
        |-- provider.tf
        `-- variables.tf
 ```
 #### Module
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -2,7 +2,7 @@
 set -e
 # Configuration
-REPO_URL="${REPO_URL:-https://git.vezpi.me/Vezpi/blog.git}"
+REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
 URL="${URL:-blog.vezpi.com}"
 BRANCH="${BRANCH:-preview}"
 CLONE_DIR="${CLONE_DIR:-/blog}"
@@ -14,6 +14,9 @@ if [ "$BRANCH" = "preview" ]; then
  DRAFTS="--buildDrafts --buildFuture"
 fi
 # Clean blog dir
 rm -rf "$CLONE_DIR"
 # Clone repo
 echo "- Cloning $REPO_URL (branch: $BRANCH)..."
 git clone --recurse-submodules --branch "$BRANCH" "$REPO_URL" "$CLONE_DIR"
--- a/hugo.yaml
+++ b/hugo.yaml
@@ -38,12 +38,6 @@ languages:
          weight: 20
          params:
            icon: "brand-github"
        - identifier: "gitea"
          name: "Gitea"
          url: "https://git.vezpi.me/Vezpi/blog"
          weight: 30
          params:
            icon: "brand-git"
        - identifier: "linkedin"
          name: "LinkedIn"
          url: "https://www.linkedin.com/in/etiennegirault/"
@@ -90,16 +84,10 @@ languages:
          weight: 20
          params:
            icon: "brand-github"
        - identifier: "gitea"
          name: "Gitea"
          url: "https://git.vezpi.me/Vezpi/blog"
          weight: 30
          params:
            icon: "brand-git"
        - identifier: "linkedin"
          name: "LinkedIn"
          url: "https://www.linkedin.com/in/etiennegirault/"
-          weight: 40
+          weight: 30
          params:
            icon: "brand-linkedin"
--- a/layouts/index.html
+++ b/layouts/index.html
@@ -0,0 +1,30 @@
 {{ define "main" }}
    <header class="homepage-header">
        {{ with .Title }}
            <h1 class="article-title">{{ . }}</h1>
        {{ end }}
        {{ with .Content }}
            <div class="article-subtitle">
                {{ . }}
            </div>
        {{ end }}
    </header>
    {{ $pages := where .Site.RegularPages "Type" "in" .Site.Params.mainSections }}
    {{ $notHidden := where .Site.RegularPages "Params.hidden" "!=" true }}
    {{ $filtered := ($pages | intersect $notHidden) }}
    {{ $pag := .Paginate ($filtered) }}
    <section class="article-list">
        {{ range $index, $element := $pag.Pages }}
            {{ partial "article-list/default" . }}
        {{ end }}
    </section>
    {{- partial "pagination.html" . -}}
    {{- partial "footer/footer" . -}}
 {{ end }}
 {{ define "right-sidebar" }}
    {{ partial "sidebar/right.html" (dict "Context" . "Scope" "homepage") }}
 {{ end }}
--- a/layouts/partials/head/custom.html
+++ b/layouts/partials/head/custom.html
@@ -1,4 +1,7 @@
-<script defer src="https://analytics.vezpi.me/script.js" data-website-id="e50e5843-1039-4bc8-a3f6-80f60e25ea38"></script>
+<script defer src="https://analytics.vezpi.com/script.js" data-website-id="e50e5843-1039-4bc8-a3f6-80f60e25ea38"></script>
 {{ if or (eq .Kind "taxonomy") (eq .Kind "term") }}
  <meta name="robots" content="noindex,follow">
 {{ end }}
 {{- if .Params.keywords }}
  <meta name="keywords" content="{{ delimit .Params.keywords ", " }}">
 {{- else if .Params.tags }}
--- a/layouts/partials/sidebar/left.html
+++ b/layouts/partials/sidebar/left.html
@@ -8,7 +8,7 @@
    <header>
        {{ with .Site.Params.sidebar.avatar }}
            {{ if (default true .enabled) }}
-            <figure class="site-avatar">
+            <figure class="site-avatar" style="margin-left: auto; margin-right: auto;">
                <a href="{{ .Site.BaseURL | relLangURL }}">
                {{ if not .local }}
                    <img src="{{ .src }}" width="300" height="300" class="site-logo" loading="lazy" alt="Avatar">
@@ -31,12 +31,13 @@
            {{ end }}
        {{ end }}
-        <div class="site-meta">
+        <div class="site-meta" style="text-align: center;">
            <h1 class="site-name"><a href="{{ .Site.BaseURL | relLangURL }}">{{ .Site.Title }}</a></h1>
-            <h2 class="site-description">{{ .Site.Params.sidebar.subtitle }}</h2>
+            <h2 class="site-description" style="font-size: 1.3rem;">{{ .Site.Params.sidebar.subtitle }}</h2>
        </div>
    </header>
    {{- $page := . -}}
    {{- with .Site.Menus.social -}}
        <ol class="menu-social">
            {{ range . }}
@@ -54,6 +55,16 @@
                    </a>
                </li>
            {{ end }}
            {{- $currentLang := $page.Language.Lang -}}
            {{- range $page.AllTranslations }}
                {{- if ne .Language.Lang $currentLang }}
                    <li class="lang-toggle-icon">
                        <a href="{{ .Permalink }}" title="Switch to {{ .Language.Lang }}">
                            {{ partial "helper/icon" (printf "toggle_to_%s" .Language.Lang) }}
                        </a>
                    </li>
                {{- end }}
            {{- end }}
        </ol>
    {{- end -}}
@@ -76,22 +87,9 @@
        {{ end }}
        <li class="menu-bottom-section">
            <ol class="menu">
                {{- $currentLang := .Language.Lang -}}
                {{- range .Site.Home.AllTranslations }}
                  {{- if ne .Language.Lang $currentLang }}
                    <li id="i18n-switch">
                      <a href="{{ .Permalink }}" title="{{ .Language.LanguageName }}">
                        {{ partial "helper/icon" "language" }}
                        {{ .Language.LanguageName }}
                      </a>
                    </li>
                  {{- end }}
                {{- end }}
                {{ if (default false .Site.Params.colorScheme.toggle) }}
                    <li id="dark-mode-toggle">
                        {{ partial "helper/icon" "moon" }}
                        {{ partial "helper/icon" "brightness-up" }}
                        <span>{{ T "darkMode" }}</span>
                    </li>
                {{ end }}
--- a/static/img/gitea-blog-ntfy-credentials.png
+++ b/static/img/gitea-blog-ntfy-credentials.png
--- a/static/img/gotify-android-first-login.png
+++ b/static/img/gotify-android-first-login.png
--- a/static/img/gotify-android-test-messages.png
+++ b/static/img/gotify-android-test-messages.png
--- a/static/img/gotify-application-list.png
+++ b/static/img/gotify-application-list.png
--- a/static/img/gotify-create-new-application.png
+++ b/static/img/gotify-create-new-application.png
--- a/static/img/gotify-dashboard-no-messages.png
+++ b/static/img/gotify-dashboard-no-messages.png
--- a/static/img/gotify-login-page.png
+++ b/static/img/gotify-login-page.png
--- a/static/img/gotify-messages-received.png
+++ b/static/img/gotify-messages-received.png
--- a/static/img/home-assistant-temperature-room-sliders.png
+++ b/static/img/home-assistant-temperature-room-sliders.png
--- a/static/img/homelan-current-physical-layout.png
+++ b/static/img/homelan-current-physical-layout.png
--- a/static/img/ingress-controller-nginx-test-simple-webserver.png
+++ b/static/img/ingress-controller-nginx-test-simple-webserver.png
--- a/static/img/k8s-deploy-test-service-tls-certificate-lets-encrypt.png
+++ b/static/img/k8s-deploy-test-service-tls-certificate-lets-encrypt.png
--- a/static/img/k8s-test-deploy-service-tls-certificate-staging-lets-encrypt.png
+++ b/static/img/k8s-test-deploy-service-tls-certificate-staging-lets-encrypt.png
--- a/static/img/k8s-test-loadbalancer-service-with-bgp.png
+++ b/static/img/k8s-test-loadbalancer-service-with-bgp.png
--- a/static/img/node-red-call-service-node-notification.png
+++ b/static/img/node-red-call-service-node-notification.png
--- a/static/img/node-red-call-service-node-set-fan-mode.png
+++ b/static/img/node-red-call-service-node-set-fan-mode.png
--- a/static/img/node-red-call-service-node-set-hvac-mode.png
+++ b/static/img/node-red-call-service-node-set-hvac-mode.png
--- a/static/img/node-red-call-service-node-set-temperature-service.png
+++ b/static/img/node-red-call-service-node-set-temperature-service.png
--- a/static/img/node-red-call-service-node-start-timer.png
+++ b/static/img/node-red-call-service-node-start-timer.png
--- a/static/img/node-red-call-service-node-start-unit-timer.png
+++ b/static/img/node-red-call-service-node-start-unit-timer.png
--- a/static/img/node-red-call-service-node-turn-off.png
+++ b/static/img/node-red-call-service-node-turn-off.png
--- a/static/img/node-red-call-service-node-turn-on.png
+++ b/static/img/node-red-call-service-node-turn-on.png
--- a/static/img/node-red-change-node-room-partout.png
+++ b/static/img/node-red-change-node-room-partout.png
--- a/static/img/node-red-current-state-node-climatisation-enabled.png
+++ b/static/img/node-red-current-state-node-climatisation-enabled.png
--- a/static/img/node-red-current-state-node-get-unit-state.png
+++ b/static/img/node-red-current-state-node-get-unit-state.png
--- a/static/img/node-red-current-state-node-lock-timer.png
+++ b/static/img/node-red-current-state-node-lock-timer.png
--- a/static/img/node-red-delay-node-1-msg-per-second.png
+++ b/static/img/node-red-delay-node-1-msg-per-second.png
--- a/static/img/node-red-filter-node-blocker.png
+++ b/static/img/node-red-filter-node-blocker.png
--- a/static/img/node-red-ha-ac-automation-before.png
+++ b/static/img/node-red-ha-ac-automation-before.png
--- a/static/img/node-red-new-ac-workflow-with-legend.png
+++ b/static/img/node-red-new-ac-workflow-with-legend.png
--- a/static/img/node-red-switch-node-check-user-id.png
+++ b/static/img/node-red-switch-node-check-user-id.png
--- a/static/img/node-red-switch-node-compare-speed.png
+++ b/static/img/node-red-switch-node-compare-speed.png
--- a/static/img/node-red-switch-node-fan-speed.png
+++ b/static/img/node-red-switch-node-fan-speed.png
--- a/static/img/node-red-switch-node-room-config.png
+++ b/static/img/node-red-switch-node-room-config.png
--- a/static/img/node-red-switch-node-room-selector-watchdog.png
+++ b/static/img/node-red-switch-node-room-selector-watchdog.png
--- a/static/img/node-red-switch-node-select-action.png
+++ b/static/img/node-red-switch-node-select-action.png
--- a/static/img/node-red-switch-node-set-temp.png
+++ b/static/img/node-red-switch-node-set-temp.png
--- a/static/img/node-red-switch-node-user-id.png
+++ b/static/img/node-red-switch-node-user-id.png
--- a/static/img/node-red-temperature-sensor-join-node.png
+++ b/static/img/node-red-temperature-sensor-join-node.png
--- a/static/img/node-red-temperature-sensors-trigger-node.png
+++ b/static/img/node-red-temperature-sensors-trigger-node.png
--- a/static/img/node-red-trigger-node-window-watchdog.png
+++ b/static/img/node-red-trigger-node-window-watchdog.png
--- a/static/img/node-red-trigger-state-mode-for-sliders.png
+++ b/static/img/node-red-trigger-state-mode-for-sliders.png
--- a/static/img/node-red-trigger-state-node-toggles.png
+++ b/static/img/node-red-trigger-state-node-toggles.png
--- a/static/img/node-red-trigger-state-node-windows.png
+++ b/static/img/node-red-trigger-state-node-windows.png
--- a/static/img/node-red-trigger-state-unit-change.png
+++ b/static/img/node-red-trigger-state-unit-change.png
--- a/static/img/ntfy-android-app.png
+++ b/static/img/ntfy-android-app.png
--- a/static/img/ntfy-login-dashboard.png
+++ b/static/img/ntfy-login-dashboard.png
--- a/static/img/ntfy-testing-blog-notifications.png
+++ b/static/img/ntfy-testing-blog-notifications.png
--- a/static/img/ntfy-testing-gitea-blog-user.png
+++ b/static/img/ntfy-testing-gitea-blog-user.png
--- a/static/img/ntfy-topic-messages.png
+++ b/static/img/ntfy-topic-messages.png
--- a/static/img/opnsense-add-os-frr-plugin.png
+++ b/static/img/opnsense-add-os-frr-plugin.png
--- a/static/img/opnsense-bgp-create-neighbor.png
+++ b/static/img/opnsense-bgp-create-neighbor.png
--- a/static/img/opnsense-bgp-neighbor-list.png
+++ b/static/img/opnsense-bgp-neighbor-list.png
--- a/static/img/opnsense-caddy-create-layer4-route-http.png
+++ b/static/img/opnsense-caddy-create-layer4-route-http.png
--- a/static/img/opnsense-create-firewall-rule-bgp-peering.png
+++ b/static/img/opnsense-create-firewall-rule-bgp-peering.png
--- a/static/img/opnsense-download-backup.png
+++ b/static/img/opnsense-download-backup.png
--- a/static/img/opnsense-enable-bgp.png
+++ b/static/img/opnsense-enable-bgp.png
--- a/static/img/opnsense-enable-routing-frr-plugin.png
+++ b/static/img/opnsense-enable-routing-frr-plugin.png
--- a/static/img/opnsense-vm-assign-pfsync-interface.png
+++ b/static/img/opnsense-vm-assign-pfsync-interface.png
--- a/static/img/opnsense-vm-carp-status.png
+++ b/static/img/opnsense-vm-carp-status.png
--- a/static/img/opnsense-vm-create-vip-carp.png
+++ b/static/img/opnsense-vm-create-vip-carp.png
--- a/static/img/opnsense-vm-dnsmasq-add-option.png
+++ b/static/img/opnsense-vm-dnsmasq-add-option.png
--- a/Show More
+++ b/Show More
		`@@ -1 +0,0 @@`
			<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-brand-git"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M16 12m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 8m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 16m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 15v-6" /><path d="M15 11l-2 -2" /><path d="M11 7l-1.9 -1.9" /><path d="M13.446 2.6l7.955 7.954a2.045 2.045 0 0 1 0 2.892l-7.955 7.955a2.045 2.045 0 0 1 -2.892 0l-7.955 -7.955a2.045 2.045 0 0 1 0 -2.892l7.955 -7.955a2.045 2.045 0 0 1 2.892 0z" /></svg>
		`@@ -0,0 +1 @@`
							`<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-message-language"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 21v-13a3 3 0 0 1 3 -3h10a3 3 0 0 1 3 3v6a3 3 0 0 1 -3 3h-9l-4 4" /><path d="M10 14v-4a2 2 0 1 1 4 0v4" /><path d="M14 12h-4" /></svg>`