Compare commits

..

1 Commits

Author SHA1 Message Date
0b5d653420 Remove public, private and bin folders
Some checks failed
Deploy / Deploy (push) Failing after 5s
2025-05-07 21:54:44 +02:00
172 changed files with 11820 additions and 12116 deletions

View File

@@ -0,0 +1,33 @@
name: Deploy
on: [push]
jobs:
Deploy:
runs-on: ubuntu
env:
BLOG_FOLDER: /blog
container:
volumes:
- /appli/data/blog:/blog
steps:
- name: Check out repository
run: |
cd ${BLOG_FOLDER}
git config --global user.name "Gitea Actions"
git config --global user.email "actions@local"
git config --global --add safe.directory ${BLOG_FOLDER}
git submodule update --init --recursive
git fetch origin
git reset --hard origin/main
- name: Download Hugo
run: |
curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | grep -oP 'https://[^"]+hugo_extended_[^"]+_Linux-64bit.tar.gz' | head -n 1 | xargs -n 1 curl -L -o hugo.tar.gz
tar -xzvf hugo.tar.gz -C ${BLOG_FOLDER}/
- name: Generate the static files with Hugo
run: |
rm -f ${BLOG_FOLDER}/content/posts/template.md
rf -rf ${BLOG_FOLDER}/private ${BLOG_FOLDER}/public
mkdir -p ${BLOG_FOLDER}/private ${BLOG_FOLDER}/public
${BLOG_FOLDER}/bin/hugo -D -b https://blog-dev.vezpi.me -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/private
${BLOG_FOLDER}/bin/hugo -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/public

View File

@@ -1,245 +0,0 @@
name: Blog Deployment
on:
push:
branches:
- preview
schedule:
- cron: '0 3 * * 5'
env:
DOCKER_IMAGE: vezpi-blog
jobs:
Check-Rebuild:
runs-on: docker
defaults:
run:
shell: sh
outputs:
latest_hugo_version: ${{ steps.get_latest.outputs.version }}
current_hugo_version: ${{ steps.get_current.outputs.version }}
newer_version_available: ${{ steps.compare.outputs.version }}
current_docker_image: ${{ steps.current_docker.outputs.image }}
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
dev_lock_present: ${{ steps.check_dev_lock.outputs.locked }}
steps:
- name: Checkout Repository
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
- name: Check Latest Hugo Version
id: get_latest
run: |
apk add curl
latest_version=$(curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | grep tag_name | sed -E 's/.*"v([^"]+)".*/\1/')
echo "version=$latest_version" | tee -a $GITEA_OUTPUT
- name: Check Current Hugo Version
id: get_current
run: |
current_version=$(docker image ls ${DOCKER_IMAGE} --format '{{.Tag}}' | head -n1)
echo "version=$current_version" | tee -a $GITEA_OUTPUT
- name: Compare Current and Latest Hugo Versions
id: compare
run: |
if [ "${{ steps.get_latest.outputs.version }}" != "${{ steps.get_current.outputs.version }}" ]; then
new_version_available=true
echo "New version available: ${{ steps.get_latest.outputs.version }}"
else
new_version_available=false
echo "Current version is the latest: ${{ steps.get_latest.outputs.version }}"
fi
echo "version=$new_version_available" | tee -a $GITEA_OUTPUT
- name: Get Current Docker Image ID
id: current_docker
run: |
current_image=$(docker image ls ${DOCKER_IMAGE}:latest --format '{{.ID}}' | head -n1)
echo "image=$current_image" | tee -a $GITEA_OUTPUT
- name: Check Changes in the Docker Folder
id: docker_folder
run: |
if git diff --name-only origin/main | grep -q '^docker/';
then
docker_folder_changed=true
echo "Change detected in the /docker folder"
else
docker_folder_changed=false
echo "No change in the /docker folder"
fi
echo "changed=$docker_folder_changed" | tee -a $GITEA_OUTPUT
- name: Check for .dev-lock file
id: check_dev_lock
run: |
if [ -f .dev-lock ]; then
echo "locked=true" | tee -a $GITEA_OUTPUT
else
echo "locked=false" | tee -a $GITEA_OUTPUT
fi
Build:
needs: Check-Rebuild
if: needs.Check-Rebuild.outputs.newer_version_available == 'true' || needs.Check-Rebuild.outputs.docker_folder_changed == 'true'
runs-on: docker
defaults:
run:
shell: sh
steps:
- name: Checkout Repository
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
- name: Build Docker Image
run: |
cd docker
docker build \
--build-arg HUGO_VERSION=${{ needs.Check-Rebuild.outputs.latest_hugo_version }} \
--tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} \
.
docker tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} ${DOCKER_IMAGE}:latest
Deploy-Staging:
needs: [Check-Rebuild, Build]
if: always() && needs.Check-Rebuild.result == 'success' && (needs.Build.result == 'skipped' || needs.Build.result == 'success')
runs-on: docker
container:
volumes:
- /appli/docker/blog:/blog
defaults:
run:
shell: sh
env:
CONTAINER_NAME: blog_staging
steps:
- name: Launch Blog Deployment
run: |
cd /blog
docker compose down ${CONTAINER_NAME}
docker compose up -d ${CONTAINER_NAME}
sleep 5
echo "- Displaying container logs"
docker compose logs ${CONTAINER_NAME}
Test-Staging:
needs: Deploy-Staging
runs-on: ubuntu
env:
URL: "https://blog-dev.vezpi.com/en/"
steps:
- name: Check HTTP Response
run: |
code=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
echo "HTTP response code: $code"
if [ "$code" -ne 200 ]; then
echo "❌ Service is not healthy (HTTP $code)"
exit 1
else
echo "✅ Service is healthy"
fi
Merge:
needs: [Check-Rebuild, Test-Staging]
if: needs.Test-Staging.result == 'success' && needs.Check-Rebuild.outputs.dev_lock_present == 'false'
runs-on: ubuntu
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
- name: Merge preview Branch on main
run: |
git merge --ff-only origin/preview
git push origin main
Deploy-Production:
needs: Merge
runs-on: docker
container:
volumes:
- /appli/docker/blog:/blog
defaults:
run:
shell: sh
env:
CONTAINER_NAME: blog_production
steps:
- name: Launch Blog Deployment
run: |
cd /blog
docker compose down ${CONTAINER_NAME}
docker compose up -d ${CONTAINER_NAME}
sleep 5
echo "- Displaying container logs"
docker compose logs ${CONTAINER_NAME}
Test-Production:
needs: Deploy-Production
runs-on: ubuntu
env:
URL: "https://blog.vezpi.com/en/"
steps:
- name: Check HTTP Response
run: |
code=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
echo "HTTP response code: $code"
if [ "$code" -ne 200 ]; then
echo "❌ Service is not healthy (HTTP $code)"
exit 1
else
echo "✅ Service is healthy"
fi
Clean:
needs: [Check-Rebuild, Build, Test-Production]
runs-on: docker
defaults:
run:
shell: sh
steps:
- name: Remove Old Docker Image
run: |
docker image rm ${{ needs.Check-Rebuild.outputs.current_docker_image }} --force
Notify:
needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
runs-on: ubuntu
if: always() && needs.Check-Rebuild.outputs.dev_lock_present == 'false'
env:
NTFY_URL: https://ntfy.vezpi.com
NTFY_TOPIC: blog
NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
steps:
- name: Notify Workflow Result
run: |
if [[
"${{ needs.Check-Rebuild.result }}" == "success" &&
("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
"${{ needs.Deploy-Staging.result }}" == "success" &&
"${{ needs.Test-Staging.result }}" == "success" &&
"${{ needs.Merge.result }}" == "success" &&
"${{ needs.Deploy-Production.result }}" == "success" &&
"${{ needs.Test-Production.result }}" == "success" &&
("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
]]; then
curl -H "Priority: min" \
-H "Tags: white_check_mark" \
-d "Blog workflow completed successfully." \
-u ${NTFY_TOKEN} \
${NTFY_URL}/${NTFY_TOPIC}
else
curl -H "Priority: high" \
-H "Tags: x" \
-H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
view, Verify Blog, https://blog.vezpi.com, clear=true" \
-d "Blog workflow failed!" \
-u ${NTFY_TOKEN} \
${NTFY_URL}/${NTFY_TOPIC}
fi

3
.gitignore vendored
View File

@@ -1,5 +1,2 @@
public
private
LICENSE
README.md
hugo

6
.gitmodules vendored
View File

@@ -1,3 +1,9 @@
[submodule "themes/hugo-coder"]
path = themes/hugo-coder
url = https://github.com/luizdepra/hugo-coder.git
[submodule "themes/PaperMod"]
path = themes/PaperMod
url = https://github.com/adityatelange/hugo-PaperMod.git
[submodule "themes/stack"]
path = themes/stack
url = https://github.com/CaiJimmy/hugo-theme-stack.git

0
.hugo_build.lock Normal file
View File

5
archetypes/default.md Normal file
View File

@@ -0,0 +1,5 @@
+++
title = '{{ replace .File.ContentBaseName "-" " " | title }}'
date = {{ .Date }}
draft = true
+++

View File

Before

Width:  |  Height:  |  Size: 108 KiB

After

Width:  |  Height:  |  Size: 108 KiB

View File

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 50 KiB

View File

Before

Width:  |  Height:  |  Size: 69 KiB

After

Width:  |  Height:  |  Size: 69 KiB

View File

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-brand-git"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M16 12m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 8m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 16m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 15v-6" /><path d="M15 11l-2 -2" /><path d="M11 7l-1.9 -1.9" /><path d="M13.446 2.6l7.955 7.954a2.045 2.045 0 0 1 0 2.892l-7.955 7.955a2.045 2.045 0 0 1 -2.892 0l-7.955 -7.955a2.045 2.045 0 0 1 0 -2.892l7.955 -7.955a2.045 2.045 0 0 1 2.892 0z" /></svg>

After

Width:  |  Height:  |  Size: 732 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-message-language"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 21v-13a3 3 0 0 1 3 -3h10a3 3 0 0 1 3 3v6a3 3 0 0 1 -3 3h-9l-4 4" /><path d="M10 14v-4a2 2 0 1 1 4 0v4" /><path d="M14 12h-4" /></svg>

Before

Width:  |  Height:  |  Size: 462 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-moon"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M12 3c.132 0 .263 0 .393 0a7.5 7.5 0 0 0 7.92 12.446a9 9 0 1 1 -8.313 -12.454z" /></svg>

Before

Width:  |  Height:  |  Size: 402 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-refresh"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M20 11a8.1 8.1 0 0 0 -15.5 -2m-.5 -4v4h4" /><path d="M4 13a8.1 8.1 0 0 0 15.5 2m.5 4v-4h-4" /></svg>

Before

Width:  |  Height:  |  Size: 417 B

View File

@@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-stopwatch"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M5 13a7 7 0 1 0 14 0a7 7 0 0 0 -14 0z" /><path d="M14.5 10.5l-2.5 2.5" /><path d="M17 8l1 -1" /><path d="M14 3h-4" /></svg>

Before

Width:  |  Height:  |  Size: 442 B

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 90 KiB

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 90 KiB

View File

@@ -1,11 +0,0 @@
.homepage-header {
text-align: center;
}
.lang-toggle-icon {
margin-left: auto;
svg {
width: 64px;
height: 24px;
}
}

View File

@@ -1,6 +0,0 @@
---
title: Bienvenue sur Vezpi Lab
description: Ici les derniers articles
---
Ce blog partage mes projets et expériences dans mon homelab.
Vous trouverez ci-dessous les derniers articles.

View File

@@ -1,6 +0,0 @@
---
title: Welcome to Vezpi Lab
description: Here the latest articles
---
This blog shares project and experiments from my homelab.
Below you'll find the latest articles.

View File

@@ -1,20 +0,0 @@
---
title: A propos de moi
description: Qui suis-je
translationKey: about
menu:
main:
name: A Propos de Moi
weight: 10
params:
icon: user
---
Salut ! Moi c'est Etienne, j'adore l'**automatisation** et je suis un amateur de projets **homelab**. Je suis un expert Linux et je travaille comme **Senior Cloud DevOps Engineer** chez Capgemini.
Motivé par la passion, j'aime explorer de nouvelles technologies, comprendre comment elles fonctionnent et les expérimenter chez moi, juste pour le plaisir. Mon lab est passé d'un simple espace de bidouille à un véritable terrain de jeu pour expérimenter la virtualisation, l'orchestration de conteneurs, le réseau, et bien plus encore.
Ce blog est ma façon de documenter ce que je construis, casse (volontairement !), répare et surtout, ce que j'**apprends**. C'est une référence personnelle, mais aussi un moyen de partager avec la communauté, de mopen source, au cas où quelqu'un d'autre suivrait le même chemin et trouverait mon expérience utile.
Même si je suis français, j'écris mes notes personnelles en anglais pour perfectionner l'utilisation de cette langue, mais jessaie tout de même de les traduire dans ma langue maternelle.
Si vous travaillez sur un projet intéressant, si vous avez des questions ou si vous souhaitez proposer de nouvelles idées, **n'hésitez pas à me contacter** !

View File

@@ -1,20 +1,10 @@
---
title: About myself
title: About
description: Who I am
translationKey: about
menu:
main:
name: About Me
weight: 10
params:
icon: user
---
Hi! I'm Etienne, an **automation** lover and **homelab** projects enthusiast. I am a Linux SME and work as a **Senior Cloud DevOps Engineer** at Capgemini.
Driven by passion, I enjoy exploring new technologies, understanding how they work and experiment those at home, just for the joy of it. My lab has grown from a simple tinkering space into a full playground for experimenting with virtualization, container orchestration, networking, and much more.
This blog is my way of documenting what I build, break (on purpose!), fix, and above all, **learn**. Its a personal reference, but also a way to give back to the community, I open source myself, in case someone else is on a similar path and finds value in my experience.
Even though I'm French, I write my personal notes in English to perfect my use of this language, but I still try to translate them into my mother tongue.
If you're working on something cool, have questions, or want to bring new ideas on the table, **feel free to reach out**!
Hello world !

View File

@@ -1,221 +0,0 @@
---
title: Bienvenue dans mon Homelab
layout: page
description: L'histoire derrière mon projet de homelab, d'un Raspberry Pi à un mini datacenter, où j'expérimente Proxmox, Kubernetes, l'automatisation et plus encore.
showToc: true
menu:
main:
name: Homelab
weight: 20
params:
icon: flask
---
## Intro
Mon aventure homelab a commencé en 2013 avec un modeste Raspberry Pi, le tout premier modèle. J'avais besoin d'une machine bon marché pour mes premiers pas dans le monde de Linux. Elle m'a beaucoup aidé à minitier dans cette technologie et m'a servi de NAS de base, merci Vezpibox (nom pourri, je sais).
En 2015, je suis passé à un Raspberry Pi 2, à la recherche de meilleures performances pour exécuter plusieurs applications comme XBMC (l'ancien nom de Kodi), CouchPotato, SickBeard... 😇
En 2018, le besoin de plus de RAM m'a conduit à un Raspberry Pi 3, me permettant d'exécuter encore plus d'applications. Mes trois petites machines fonctionnaient harmonieusement ensemble, dans un bordel bien ordonné.
Enfin, en 2019, mon nouveau travail m'a fait expérimenter la virtualisation, avec les machines virtuelles et surtout Docker. Je voulais essayer ça à la maison, et je suis passé a la vitesse supérieure avec un mini-PC compact mais assez puissant qui a posé les bases de mon homelab.
---
## Pourquoi un Homelab ?
Je voulais mon propre terrain de jeu, un espace où je pouvais construire mais aussi casser des choses sans risques, apprendre à les réparer et mieux comprendre leur fonctionnement.
Mon unique serveur était génial, mais tester quoi que ce soit de risqué dessus était devenu problématique. Il exécutait des services critiques comme la domotique ou le DNS, lorsqu'il était en panne, tout était en panne. Le serveur était devenu indispensable, et croyez-moi, ne pas avoir de lumières ni d'internet était un incident majeur pour ma famille. Plus aussi fun.
Le premier grand défi que je me suis lancé a été de créer un cluster Kubernetes. Bien sûr, j'aurais pu en exécuter un sur un seul nœud, mais à quoi bon un cluster avec un seul nœud ? On pourrait dire qu'utiliser Kubernetes pour contrôler mes volets est overkill, et vous auriez raison. Mais ce n'était pas le sujet.
Je voulais aussi créer de nouvelles machines virtuelles à volonté, les reconstruire de zéro et appliquer les principes de l'Infrastructure as Code. J'aurais pu faire tout cela dans le cloud, mais je voulais un contrôle total.
Au départ, mon objectif était d'assurer la haute disponibilité de mes services existants. Un seul serveur ne suffisait pas. J'avais donc besoin d'un deuxième nœud. Mais dans la plupart des configurations haute disponibilité, trois nœuds constituent le compromis idéal. Et c'est ainsi que j'ai pu construire ce qui allait devenir mon homelab.
---
## Conception du Lab
Tout d'abord, il me fallait définir les fonctions de mon homelab. Je souhaitais qu'il héberge mes services existants de manière fiable, mais cela ne suffisait pas, je voulais un véritable terrain de jeu, capable de simuler un environnement d'entreprise plus complexe.
### Blueprint
Cela impliquait :
- **Haute disponibilité** : Trois nœuds pour garantir qu'aucun point de défaillance ne puisse tout interrompre.
- **Stockage distribué** : Redondance des données entre les nœuds, non seulement pour garantir la disponibilité, mais aussi pour apprendre le fonctionnement des systèmes de stockage d'entreprise.
- **Segmentation du réseau** : Plusieurs VLAN pour imiter les topologies de réseau réelles, isoler les services et pratiquer la mise en réseau avancée.
### Contraintes
Bien sûr, la réalité ne correspond pas toujours aux ambitions. Voici ce à quoi je me suis heurté :
- **Espace** : Mon lab devait tenir dans un petit espace de service cachée au milieu de mon appartement. Pas vraiment une salle de serveurs.
- **Bruit** : Le silence était crucial. Ce n'était pas un endroit caché dans un garage ou un sous-sol, mais en plein cœur de notre espace de vie.
- **Consommation électrique** : Fonctionnant 24/7, la consommation électrique devait être maîtrisée. Je ne pouvais pas me permettre de tripler ma facture d'électricité juste pour bricoler des machines virtuelles.
- **Budget** : Je n'allais pas dépenser des milliers d'euros pour du matériel professionnel. L'équilibre résidait dans la recherche d'un équipement d'occasion fiable et abordable.
- **Température** : Franchement, je n'y avais pas pensé… Les mini-PC ne chauffent pas beaucoup, mais le matériel réseau ? C'est une autre histoire. Leçon apprise.
En un mot, je souhaitais construire un mini datacenter dans un placard.
---
## Présentation de l'Infrastructure
Décomposons les composants de mon homelab.
### Rack
Que serait un datacenter sans rack ? Honnêtement, je ne pensais pas qu'un rack pourrait tenir dans mon espace limité, jusqu'à ce que je découvre le [DeskPi RackMate T1](https://deskpi.com/products/deskpi-rackmate-t1-2).
Ce produit était parfait. Sa taille était idéale, sa qualité de fabrication impressionnante et sa conception modulaire m'a permis d'ajouter des accessoires supplémentaires, comme une multiprise et des étagères, pour compléter l'installation.
### Serveurs
J'avais déjà un serveur qui constituait la pierre angulaire de mon homelab et je souhaitais le conserver. Cependant, il présentait deux inconvénients majeurs :
- **Interface réseau unique** : Je voulais au moins deux cartes réseau pour la segmentation et la redondance du réseau.
- **Matériel vieillissant** : Il avait plus de cinq ans et ses options de compatibilité devenaient limitées.
Pour la carte réseau manquante, j'ai envisagé un adaptateur USB, mais j'ai finalement trouvé une meilleure solution : utiliser le port M.2 interne, initialement prévu pour un module Wi-Fi, pour connecter un adaptateur 2,5 Gbit/s. C'était la solution idéale.
Concernant le matériel, mon serveur actuel était équipé d'un Ryzen 3 2200G AM4 avec 16 Go de RAM DDR4. Pour garantir la cohérence et simplifier la compatibilité, j'ai décidé de conserver le socket AM4 pour tous les nœuds.
Les spécifications des deux nœuds supplémentaires étaient claires : un socket AM4 pour la cohérence, une faible consommation d'énergie, deux cartes réseau dont au moins une à 2,5 Gbit/s, et des options de stockage suffisantes, dont au moins un emplacement M.2 NVMe et une baie pour lecteur 2,5 pouces. L'AM4 étant un peu ancien, les modèles plus récents étaient exclus, ce qui était une bonne nouvelle pour mon budget, car j'ai pu acheter des mini-PC d'occasion.
Voici les spec de mes nœuds :
| **Node** | **Vertex** | **Apex** | **Zenith** |
| --------- | ----------------------- | ----------------------- | ------------------------ |
| **Model** | ASRock DeskMini A300 | Minisforum HM50 | T-bao MN57 |
| **CPU** | AMD Ryzen 3 2200G 4C/4T | AMD Ryzen 5 4500U 6C/6T | AMD Ryzen 7 5700U 8C/16T |
| **TDP** | 65W | 15W | 15W |
| **RAM** | 16GB | 16GB | 32GB |
| **NIC** | 1Gbps (+ 2.5Gbps) | 1Gbps + 2.5Gbps | 1Gbps + 2.5Gbps |
| **M.2** | 2 | 1 | 1 |
| **2,5"** | 2 | 2 | 1 |
Chaque nœud a la même configuration de disque : un SSD de 256 Go dans la baie 2,5" pour le système dexploitation et un disque NVMe de 1 To pour le stockage des données.
### Réseau
Pour le réseau, javais deux objectifs principaux : implémenter des VLAN pour la segmentation du réseau et gérer mon pare-feu pour un contrôle plus précis. Mes nœuds étant équipés de cartes réseau 2,5 Gbit/s, javais besoin de switchs capables de gérer ces débits, ainsi que de quelques ports Power over Ethernet (PoE) pour mon antenne Zigbee et ses futures fonctionnalités.
Au départ, jétais attiré par le matériel MikroTik, idéal pour apprendre, mais la disposition de leurs switchs ne correspondait pas vraiment à ma configuration. En revanche, la gamme UniFi dUbiquiti était la solution de facilité, avec son interface utilisateur élégante et son esthétique matérielle impressionnante.
Pour le routeur, je ne voulais pas de passerelle UniFi. Je voulais quelque chose de plus personnalisable, avec lequel je pouvais bidouiller. Après quelques recherches, jai opté pour OPNsense plutôt que pfSense. Il paraît que c'est un peu plus adapté aux débutants, et jusqu'à présent, je ne l'ai pas regretté.
Voici la configuration réseau finale :
- **Routeur :** OPNsense fonctionnant sur un boîtier Topton sans ventilateur avec un processeur Intel N100, 16 Go de RAM et 4 ports 2,5 Gbit/s.
- **Swtich :** [UniFi Switch Lite 16 PoE](https://eu.store.ui.com/eu/en/category/switching-utility/products/usw-lite-16-poe), 8 ports PoE 1 Gbit/s et 8 ports non PoE.
- **Swtich :** [UniFi Flex Mini 2,5 G](https://eu.store.ui.com/eu/en/category/switching-utility/products/usw-flex-2-5g-5), 5 ports 2,5 Gbit/s, dont un port PoE entrant.
- **Point d'accès :** [UniFi U7 Pro Wall](https://eu.store.ui.com/eu/en/category/all-wifi/products/u7-pro-wall), Wi-Fi 7, 2,5 Gbit/s PoE+ entrant.
### Stockage
Bien que je n'aie pas besoin d'un stockage important, il me fallait une configuration flexible pour stocker les datas de mon homelab, ainsi que mes médias et documents personnels.
Chaque nœud Proxmox est équipé d'un SSD SATA de 256 Go pour le système d'exploitation, les fichiers ISO et les templates VM/LXC. Pour le stockage des datas, j'ai ajouté un disque NVMe de 1 To par nœud, qui constitue la base de mon cluster Ceph. Cela me permet d'obtenir un stockage distribué, redondant et performant pour les VM et les conteneurs, ce qui permet une migration à chaud et une haute disponibilité sur l'ensemble du cluster.
À l'origine, mon premier serveur était équipé de deux disques durs de 1 To installés en interne. Comme j'avais besoin d'un emplacement pour le SSD, je les ai déplacés hors du boîtier à l'aide d'adaptateurs USB vers SATA et les ai reconnectés au même nœud. Ces disques stockent mes photos, mes documents Nextcloud et mes sauvegardes, des données moins critiques pour les performances qui n'ont pas besoin de rester sur Ceph. Ils sont servis sur le réseau à laide dun serveur NFS situé dans un conteneur LXC sur ce nœud.
### Refroidissement
J'ai vite compris que mon équipement réseau transformait mon placard en mini-fournaise. Heureusement, j'ai commencé la construction en décembre, donc la chaleur n'était pas trop perceptible, mais avec l'été, elle allait forcément devenir un vrai problème.
Les options étaient limitées, impossible de convaincre ma femme que nos serveurs avaient besoin d'un système de refroidissement. De plus, il fallait que ce soit silencieux. Une combinaison difficile.
La meilleure solution que j'ai trouvée a été de percer deux trous de 40 mm au-dessus du meuble de cuisine. J'ai fait passer des tuyaux en PVC dans le mur et installé deux petits ventilateurs, chacun recouvert de mousse pour minimiser les vibrations et le bruit.
À l'intérieur du rack, j'ai également ajouté deux ventilateurs de 80 mm pour améliorer la circulation de l'air. Pour un fonctionnement silencieux, j'ai branché un contrôleur PWM pour réguler la vitesse des ventilateurs, trouvant ainsi un équilibre entre circulation d'air et silence.
### Photos
Voici à quoi ça ressemble :
![Front view of my homelab with legend](img/homelab-rack-legend.png)
![Different views of my homelab with open and closed enclosure](img/homelab-enclore-open-closed.png)
---
## Stack Logicielle
Une fois les fondations matérielles posées, l'étape suivante consistait à déterminer la partie software qui orchestrerait l'ensemble, véritable moteur de chaque expérience, déploiement et opportunité d'apprentissage.
### Hyperviseur
Au cœur de ma configuration se trouve un cluster Proxmox VE 8 à 3 nœuds, un hyperviseur basé sur KVM prenant également en charge les conteneurs LXC. Basé sur Debian, il offre des fonctionnalités essentielles telles que la migration à chaud, la haute disponibilité et l'intégration de Ceph, prêtes à l'emploi.
Pour l'instant, j'utilise principalement une seule VM et un seul conteneur LXC. La VM est en quelque sorte un clone de mon ancien serveur physique, hébergeant la plupart de mes applications sous forme de conteneurs Docker. Le conteneur LXC sert de simple jump server.
### Réseau
L'objectif de mon réseau était d'implémenter des VLAN pour la segmentation et de gérer directement les règles de pare-feu afin de simuler des configurations plus complexes.
#### Routeur et pare-feu
Au cœur de ce réseau se trouve **OPNsense**, fonctionnant dans un boîtier dédié sans ventilateur. Le routeur du FAI est en mode pont et transmet tout le trafic à OPNsense, qui gère toutes les fonctions de routage et de pare-feu. Le trafic inter-VLAN est restreint, des règles de pare-feu explicites sont obligatoires, et seul le VLAN de management a accès aux autres segments.
#### Réseau L2
Le réseau de couche 2 est géré par des **switchs UniFi**, choisis pour leur interface utilisateur épurée et leur simplicité. Le contrôleur UniFi, qui gère la configuration des appareils, fonctionne en tant que plugin sur OPNsense.
Le point d'accès diffuse deux SSID : un pour les ordinateurs et téléphones portables de ma famille (5 et 6 GHz) et l'autre uniquement en 2,4 GHz pour tout le reste (IoT, aspirateur, climatisation, imprimante, Chromecast, etc.).
Un switch UniFi 2,5 Gbit/s est dédié aux communications de Ceph, isolant le trafic de stockage pour éviter les interférences avec d'autres réseaux.
J'ai configuré **LACP** (agrégation de liens) entre le routeur et le commutateur principal à 1 Gbit/s, dans l'espoir de doubler la bande passante. En réalité : une session n'utilise qu'un seul lien, ce qui signifie qu'un téléchargement unique est toujours plafonné à 1 Gbit/s.
#### VLAN
Pour segmenter le trafic, j'ai divisé le réseau en plusieurs VLAN :
| Nom | ID | Rôle |
| --------- | ---- | ---------------------------- |
| User | 13 | Home network |
| IoT | 37 | IoT and untrusted equipments |
| DMZ | 55 | Internet facing |
| Lab | 66 | Lab network, trusted |
| Heartbeat | 77 | Proxmox cluster heartbeat |
| Mgmt | 88 | Management |
| Ceph | 99 | Ceph |
| VPN | 1337 | Wireguard network |
Chaque VLAN possède son propre pool DHCP géré par OPNsense, à l'exception des VLAN Heartbeat et Ceph.
#### DNS
Au sein d'OPNsense, le DNS est structuré en deux couches :
- ADguard Home : filtres de publicités et de traqueurs, sert chaque client du réseau sur le port DNS standard 53.
- Unbound DNS : DNS récursif, distribue uniquement le service DNS ADguard Home en interne.
#### Reverse Proxy
**Caddy** fonctionne comme plugin sur OPNsense et sert de point d'entrée principal pour le trafic web. Il achemine les requêtes en fonction des sous-domaines, gère automatiquement les certificats HTTPS et drop les accès aux services internes provenant du WAN.
La plupart des services sont toujours gérés par une instance **Traefik** exécutée sur ma VM. Dans ce cas, Caddy transfère simplement les requêtes HTTPS directement à Traefik.
Cette configuration de proxy à deux couches centralise la gestion des certificats SSL dans **Caddy** tout en préservant un routage interne flexible et dynamique avec **Traefik**.
#### VPN
Pour un accès distant sécurisé, j'ai configuré **WireGuard** sur OPNsense. Ce VPN léger fournit une connectivité chiffrée à mon lab où que je sois, permettant ainsi de gérer tous mes services sans les exposer directement à Internet.
#### Schéma Réseau
![Diagram of my home network ](img/homelab-network-schema.png)
### Application
Plongeons dans la partie fun ! Ce qui a commencé comme une modeste configuration destinée à répondre à quelques besoins personnels s'est rapidement transformé en un écosystème complet de services open source, chacun répondant à un besoin spécifique ou simplement à la curiosité.
Voici un aperçu de ce qui fonctionne actuellement dans mon homelab :
- **Home Assistant** : Plateforme centralisée pour la domotique, intégrant des appareils connectés et des routines.
- **Vaultwarden** : Alternative légère à Bitwarden pour gérer et synchroniser les mots de passe en toute sécurité.
- **Nextcloud** : Stockage cloud self-hosted.
- **Gitea** : Solution de dépôt Git pour gérer mon code et mes projets.
- **Blog** : Mon blog personnel basé sur Hugo, que vous lisez actuellement.
- **Immich** : Application de gestion de photos et de vidéos, similaire à Google Photos.
- **Jellyfin** : Serveur multimédia pour le streaming de films et de séries.
- **ARR Stack** : Outils d'acquisition multimédia automatisés. (Radarr, Sonarr, Torrent, etc.)
- **Duplicati** : Solution de sauvegarde chiffrée pour mes données et configurations importantes.
- **Prometheus** : Outil de surveillance et de collecte de métriques, utilisé avec Grafana pour les tableaux de bord.
- **Portainer** : Interface web pour la gestion des conteneurs et des stacks Docker.
- **Umami** : Analyses axées sur la confidentialité pour le suivi des visites sur mon blog.
- **phpIPAM** : Outil de gestion des adresses IP pour l'organisation de mes VLAN et sous-réseaux.
#### Docker
Docker a véritablement révolutionné mon aventure homelab. Avant les conteneurs, gérer plusieurs services sur un seul serveur était une bataille constante avec les dépendances et les conflits. Aujourd'hui, chaque service fonctionne parfaitement, géré par Docker Compose au sein d'une seule VM. Traefik gère dynamiquement le reverse proxy, simplifiant ainsi l'accès et les certificats SSL.
#### Kubernetes
Mon prochain grand défi est de faire passer l'orchestration des conteneurs au niveau supérieur. Si Docker Swarm pouvait répondre à ce besoin technique, mon objectif principal est d'acquérir une expérience pratique de Kubernetes, et il n'y a pas de meilleur moyen d'apprendre que de l'appliquer à des cas d'utilisation concrets.
---
## Derniers Mots
Merci d'avoir pris le temps de lire mon aventure homelab !
Construire et peaufiner cette configuration a été une formidable source d'apprentissage et de plaisir, et je suis toujours à la recherche de nouvelles façons de l'améliorer.
Si vous avez des idées, des commentaires, de meilleures solutions, ou si vous souhaitez simplement partager votre propre configuration, n'hésitez pas à me contacter. Envoyez-moi un message, challengez mes choix ou partagez votre histoire avec moi !

View File

@@ -1,221 +1,49 @@
---
title: Welcome to my Homelab
layout: page
description: The story behind my homelab project, from a Raspberry Pi to a tiny datacenter, where I experiment with Proxmox, Kubernetes, automation and more.
title: "Homelab"
layout: "page"
description: "An overview of the hardware, software, and projects powering my personal homelab."
showToc: true
menu:
main:
name: Homelab
weight: 20
params:
icon: flask
---
## Intro
My homelab journey began in 2013 with a humble Raspberry Pi, the very first model. I was needing a cheap machine for my first step into the world of Linux. It helped me a lot to dive into this technology and served me as a basic NAS, thank you Vezpibox (shitty name, I know)
Welcome to my homelab — a space where I explore new technologies, break things on purpose, and learn by doing. What started as a few old machines has grown into a modest but powerful setup that I use for self-hosting, automation, testing infrastructure tools, and running personal projects.
In 2015, I upgraded to a Raspberry Pi 2, seeking better performance to run multiple applications like XBMC (the old Kodi's name), CouchPotato, SickBeard... 😇
## 1. Hardware
By 2018, the need for more RAM led me to a Raspberry Pi 3, allowing me to run even more applications. My 3 little machines were running happily together, in a quite ordered mess.
I currently run a 3-node cluster built with energy-efficient mini PCs and repurposed desktops. Here's a breakdown:
Finally, in 2019, my new job made me experiment the virtualization, with virtual machines and above all Docker. I wanted to try that at home, I took a significant step forward with a compact yet quite powerful headless mini PC that laid the foundation of my homelab.
- **Node 1**: AMD Ryzen 4C/4T, 16GB RAM
- **Node 2**: AMD Ryzen 6C/6T, 16GB RAM
- **Node 3**: AMD Ryzen 8C/16T, 64GB RAM
- **Storage**: Ceph-based distributed storage across all nodes
- **Network**: 1Gbps LAN with 2.5Gbps NICs for Ceph replication traffic
- **Rack**: Compact 10" rack with managed switch and PDU
---
## Why a Homelab ?
## 2. Software
I wanted my own playground, a space where I could build but also safely break things, learn to fix them, and gain a deeper understanding of how they work.
- **Proxmox VE**: Used for virtualization and clustering
- **Ceph**: Distributed storage for VM disks
- **Kubernetes (K3s)**: For orchestrating containerized apps
- **Gitea**: Self-hosted Git with CI/CD via Gitea Actions
- **OPNsense**: Firewall, VLANs, and DNS (with AdGuard + Unbound)
- **Monitoring**: Grafana, Prometheus, Node Exporter
My single server was great, but testing anything risky on it became a problem. It was running critical services like home automation or DNS, when it was down, everything was down. The server had become indispensable, and believe me, having no lights or internet is a major incident in my family. Not so fun anymore.
## 3. Projects
The first big challenge I set for myself was building a Kubernetes cluster. Sure, I could run one on a single node, but whats the point of a cluster with only one node? You could argue that running Kubernetes to control my shutters is overkill, and youd be right. But that wasnt the point.
Some of the ongoing and past projects I've worked on:
I also wanted to spawn new virtual machines at will, rebuild them from scratch, and apply Infrastructure as Code principles. I could have done all of that in the cloud, but I wanted full control.
Initially, my goal was to provide high availability for my existing services. One server wasnt enough. So, I wanted a second node. But in most HA setups, three nodes are the sweet spot. And just like that, I was on my way to building what would become my homelab.
---
## Shaping the Lab
First, I needed to define what my homelab was actually supposed to do. I wanted it to host my existing services reliably, but that wasnt enough, I wanted a true playground, capable of simulating a more complex enterprise environment.
### Blueprint
That meant:
- **High Availability:** Three nodes to ensure that no single point of failure would bring everything down.
- **Distributed Storage:** Data redundancy across nodes, not just for uptime but also to learn how enterprise-grade storage systems work.
- **Network Segmentation:** Multiple VLANs to mimic real-world network topologies, isolate services, and practice advanced networking.
### Constraints
Of course, reality doesnt always align with ambitions. Heres what I was up against:
- **Space:** My lab needed to fit in a small, hidden service enclosure in the middle of my apartment. Not exactly a server room.
- **Noise:** Silence was crucial. This wasnt going to be tucked away in a garage or basement, it was right in the middle of our living space.
- **Power Draw:** Running 24/7, the power consumption had to be kept in check. I couldnt afford to triple my electric bill just to tinker with VMs.
- **Budget:** I wasnt going to drop thousands on enterprise-grade hardware. The balance was finding reliable, second-hand gear that wouldnt break the bank.
- **Temperature**: Im not gonna lie, I hadn't thought of it... Mini PCs dont generate much heat, but network gear? Thats a different story. Lesson learned.
In a nutshell, I wanted to build a tiny datacenter in a closet.
---
## Infrastructure Overview
Lets break down the components that make up my homelab.
### Rack
What is a datacenter without a rack? Honestly, I didnt think one would fit in my limited space, until I discovered the [DeskPi RackMate T1](https://deskpi.com/products/deskpi-rackmate-t1-2).
This beauty was the perfect match. The size was spot-on, the build quality impressive, and the modular design allowed me to add some extra accessories, like a power strip and shelves, to complete the setup.
### Servers
I already had one server that served as the cornerstone of my homelab, and I wanted to keep it. But it had two major drawbacks:
- **Single Network Interface:** I wanted at least two NICs for network segmentation and redundancy.
- **Aging Hardware:** It was over five years old, and its compatibility options were becoming limited.
For the missing NIC, I considered a USB adapter but then stumbled upon a better solution: using the internal M.2 port, originally meant for a WiFi module, to connect a 2.5Gbps adapter. It was a perfect fit.
Regarding hardware, my existing server was powered by an AM4 Ryzen 3 2200G with 16GB of RAM DDR4. To keep things consistent and simplify compatibility, I decided to stick with the AM4 socket for all nodes.
The specifications for the two additional nodes were clear: an AM4 socket for consistency, low power consumption, dual NICs with at least one 2.5Gbps, and sufficient storage options, at least one M.2 NVMe slot and a 2.5" drive bay. Since AM4 is somewhat dated, newer models were off the table, a good news for my budget, as I was able to buy second-hand mini PCs.
Here is the specs of my nodes:
| **Node** | **Vertex** | **Apex** | **Zenith** |
| --------- | ----------------------- | ----------------------- | ------------------------ |
| **Model** | ASRock DeskMini A300 | Minisforum HM50 | T-bao MN57 |
| **CPU** | AMD Ryzen 3 2200G 4C/4T | AMD Ryzen 5 4500U 6C/6T | AMD Ryzen 7 5700U 8C/16T |
| **TDP** | 65W | 15W | 15W |
| **RAM** | 16GB | 16GB | 32GB |
| **NIC** | 1Gbps (+ 2.5Gbps) | 1Gbps + 2.5Gbps | 1Gbps + 2.5Gbps |
| **M.2** | 2 | 1 | 1 |
| **2,5"** | 2 | 2 | 1 |
Each node follows the same disk layout: a 256GB SSD in the 2.5" bay for the operating system and a 1TB NVMe drive for data storage.
### Network
For the network, I had two main objectives: implement VLANs for network segmentation and manage my firewall for more granular control. Since my nodes were equipped with 2.5Gbps NICs, I needed switches that could handle those speeds, and a few Power over Ethernet (PoE) ports for my Zigbee antenna and what could come after.
Initially, I was drawn to MikroTik hardware, great for learning, but their switch layouts didnt quite align with my setup. On the other hand, Ubiquiti's UniFi line was the easy route with their with a sleek UI and impressive hardware aesthetics.
For the router, I opted against the UniFi gateway. I wanted something more customizable, something I could get my hands dirty with. After some research, I settled on OPNsense over pfSense, it was said to be a bit more beginner-friendly, and so far, I havent regretted it.
Here is the final network setup:
- **Router:** OPNsense running on a fanless Topton box with an Intel N100, 16GB RAM, and 4x 2.5Gbps ports.
- **Switch:** [UniFi Switch Lite 16 PoE](https://eu.store.ui.com/eu/en/category/switching-utility/products/usw-lite-16-poe), 8x 1Gbps PoE ports and 8x non-PoE ports.
- **Switch:** [UniFi Flex Mini 2.5G](https://eu.store.ui.com/eu/en/category/switching-utility/products/usw-flex-2-5g-5), 5x 2.5Gbps ports, with one PoE-in port.
- **Access Point:** [UniFi U7 Pro Wall](https://eu.store.ui.com/eu/en/category/all-wifi/products/u7-pro-wall), Wi-Fi 7, 2.5Gbps PoE+ in.
### Storage
While I don't have massive storage requirement, I still needed a flexible setup to either store my homelab workload and my personal media and documents.
Each Proxmox node is equipped with a **256GB SATA SSD** for the operating system, ISO files, and VM/LXC templates. For the workload storage, I added a **1TB NVMe drive** per node, which forms the basis of my **Ceph cluster**. This gives me distributed, redundant, and high-performance storage for VMs and containers, which allows live migration and high availability across the cluster.
Originally, my first server had two **1TB HDDs** installed internally. Because I needed a slot for the SSD, I moved them outside the case using **USB-to-SATA adapters** and reconnected them to the same node. These drives store my photos, Nextcloud documents and backups, less performance-critical data that doesnt need to sit on Ceph. They are served on the network using a NFS server sitting in a LXC container on that node.
### Cooling
I quickly learned that my network gear was turning my closet into a mini furnace. Fortunately, I started the build in December, so the heat wasnt too noticeable, but come summer, it was bound to become a real problem.
Options were limited, there was no way I was going to convince my wife that our servers needed a cooling system. Plus, it had to be silent. Not an easy combo.
The best solution I came up with was to drill two 40mm holes above the kitchen cabinet. I ran PVC pipes through the wall and installed two small fans, each cushioned with foam to minimize vibrations and keep noise down.
Inside the rack, I also added two 80mm fans to help with airflow. To keep everything quiet, I hooked up a PWM controller to regulate fan speeds, striking a balance between airflow and silence.
### Photos
Here what is look like:
![Front view of my homelab with legend](img/homelab-rack-legend.png)
![Different views of my homelab with open and closed enclosure](img/homelab-enclore-open-closed.png)
---
## Software Stack
With the hardware foundation set, the next step was to decide what software would orchestrate everything, the real engine behind every experiment, deployment, and learning opportunity.
### Hypervisor
At the core of my setup is a 3-node Proxmox VE 8 cluster, a KVM-based hypervisor that also supports LXC containers. Built on Debian, it provides essential features like live migration, HA, and seamless Ceph integration right out of the box.
For now, Im primarily running just one VM and one LXC container. The VM is essentially a clone of my old physical server, hosting most of my applications as Docker containers. The LXC container serves as a simple jump server.
### Network
The objective for my network was to implement VLANs for segmentation and manage firewall rules directly to simulate more complex setups.
#### Router and Firewall
At the heart of this network is **OPNsense**, running on a dedicated fanless box. The ISP router is in bridge mode, passing all traffic to OPNsense, which handles all routing and firewall duties. Inter-VLAN traffic is restricted, explicit firewall rules are mandatory, only the management VLAN having access to other segments.
#### L2 Network
Layer 2 networking is managed by **UniFi switches**, chosen for their sleek UI and simplicity. The UniFi controller, which manages the devices configuration, runs as a plugin on OPNsense.
The access point is broadcasting 2 SSIDs, one for my family's laptops and cellulars (5 and 6Ghz) and the other only in 2.4Ghz for everything else (IoT, vacuum, AC, printer, Chromecast, etc.)
A 2.5Gbps UniFi switch is dedicated to Ceph storage communications, isolating storage traffic to prevent interference with other networks.
I set up **LACP** (Link Aggregation) between the router and the main switch at 1Gbps, hoping to double bandwidth. Reality check: a single session will only use one link, meaning that a single download will still cap at 1Gbps.
#### VLANs
To segment traffic, I divided the network into several VLANs:
| Name | ID | Purpose |
| --------- | ---- | ---------------------------- |
| User | 13 | Home network |
| IoT | 37 | IoT and untrusted equipments |
| DMZ | 55 | Internet facing |
| Lab | 66 | Lab network, trusted |
| Heartbeat | 77 | Proxmox cluster heartbeat |
| Mgmt | 88 | Management |
| Ceph | 99 | Ceph |
| VPN | 1337 | Wireguard network |
Each VLAN has its own DHCP pool managed by OPNsense, excepted the Heartbeat and Ceph ones.
#### DNS
DNS is structured in two layers within OPNsense:
- ADguard Home: ads and trackers filters, serves every client on the network over plain DNS on port 53
- Unbound DNS: recursive DNS, serves only the ADguard Home DNS service locally
#### Reverse Proxy
**Caddy** runs as a plugin on OPNsense and acts as the main entry point for web traffic. It routes requests based on subdomains and automatically handles HTTPS certificates and drops internal service access coming from the WAN.
Most services are still managed by a **Traefik** instance running on my VM. In those cases, Caddy simply forwards HTTPS requests directly to Traefik.
This two-layer proxy setup centralizes SSL certificate management in **Caddy** while preserving flexible and dynamic routing internally with **Traefik**.
#### VPN
For secure remote access, I configured **WireGuard** on OPNsense. This lightweight VPN provides encrypted connectivity to my lab from anywhere, allowing management of all my services without exposing them all directly to the internet.
#### Network Diagram
![Diagram of my home network ](img/homelab-network-schema.png)
### Application
Let's dive into the fun part! What started as a modest setup meant to serve a few personal needs quickly turned into a full ecosystem of open source services, each solving a specific need or just satisfying curiosity.
Heres an overview of whats currently running in my homelab:
- **Home Assistant**: Central hub for home automation, integrating smart devices and routines.
- **Vaultwarden**: Lightweight alternative to Bitwarden for managing and syncing passwords securely.
- **Nextcloud**: Self-hosted cloud storage.
- **Gitea**: Git repository solution for managing my code and projects.
- **Blog**: My Hugo-based personal blog, which you are reading now.
- **Immich** Photo and video management app, similar to Google Photos.
- **Jellyfin**: Media server for streaming movies and shows.
- **ARR Stack**: Automated media acquisition tools. (Radarr, Sonarr, Torrent, etc.)
- **Duplicati**: Encrypted backup solution for my important data and configs.
- **Prometheus**: Monitoring and metrics collection tool, used with Grafana for dashboards.
- **Portainer**: Web interface for managing Docker containers and stacks.
- **Umami**: Privacy-focused analytics for tracking visits on my blog.
- **phpIPAM**: IP address management tool for keeping my VLANs and subnets organized.
#### Docker
Docker was the real game-changer in my self-hosted journey. Before containers, managing multiple services on a single server felt like a constant battle with dependencies and conflicts. Now, every service runs neatly, managed with Docker Compose inside a single VM. Traefik dynamically handles reverse proxy, simplifying access and SSL certificates.
#### Kubernetes
My next big challenge is to take container orchestration to the next level. While Docker Swarm could meet the technical need, my primary goal is to gain hands-on experience with Kubernetes, and theres no better way to learn than by applying it to real-world use cases.
- CI/CD automation using Gitea Actions
- GitOps pipeline for Kubernetes using ArgoCD
- Hugo-based personal blog hosted with Docker
- Home automation with Zigbee2MQTT and Home Assistant
- VPN and remote access via WireGuard
- Infrastructure as Code with Terraform and Ansible
---
## Final Words
If you're curious about any part of the stack or want to know how I built something specific, feel free to check the related blog posts!
Thank you for taking the time to read through my homelab journey!
Building and refining this setup has been a great source of learning and fun, and Im always looking for new ways to improve it.
If youve got ideas, feedback, better solutions, or just want to share your own setup, Id love to hear from you. Drop me a message, challenge my choices, or inspire me with your story!

View File

@@ -1,13 +0,0 @@
---
title: Recherche
slug: search
layout: search
outputs:
- html
- json
menu:
main:
weight: 30
params:
icon: search
---

View File

@@ -1,90 +0,0 @@
---
slug: proxmox-cloud-init-vm-template
title: Proxmox - Créer un Template de VM Cloud-Init
description: Découvrez comment créer un template de VM Ubuntu réutilisable avec cloud-init dans Proxmox pour accélérer et simplifier le déploiement de machines virtuelles.
date: 2025-03-31
draft: false
tags:
- proxmox
- cloud-init
categories:
- homelab
---
## Intro
Créer un template de VM dans **Proxmox** avec **cloud-init** peut considérablement simplifier les déploiements de VM. Cet article décrit étape par étape la configuration d'un template de VM compatible **cloud-init** avec **Ubuntu** pour **Proxmox**.
Proxmox prend en charge cloud-init, un outil qui permet la configuration automatique des machines virtuelles immédiatement après leur provisionnement. Cela inclut la configuration du réseau, des clés SSH et d'autres paramètres initiaux.
Dans ce guide, nous allons créer un template de VM avec cloud-init activé, permettant ainsi un déploiement rapide de VM préconfigurées.
---
## Pourquoi Cloud-init ?
Cloud-init est un outil largement utilisé pour automatiser la configuration initiale des instances cloud. Il permet de configurer les clés SSH, le nom d'hôte, la configuration réseau et d'autres paramètres dès le premier démarrage, ce qui le rend idéal pour créer des templates de VM réutilisables en homelab ou en environnement de production.
[Documentation Proxmox Cloud-init](https://pve.proxmox.com/wiki/Cloud-Init_Support)
## Télécharger l'Image de l'OS
Tout d'abord, nous devons télécharger une image compatible cloud-init. Bien que Rocky Linux ait été initialement envisagé, le format `.img` n'était pas disponible et le format `.qcow2` posait problème. Nous allons donc utiliser l'image cloud d'Ubuntu.
Trouvez des images compatibles cloud dans le [Guide des images OpenStack](https://docs.openstack.org/image-guide/obtain-images.html).
Dans Proxmox, accédez à **Storage > ISO Images > Upload** pour uploader l'image téléchargée.
![Download window for ISO images in Proxmox](img/proxmox-download-iso-img.png)
## Créer la VM
Ensuite, on crée une VM en utilisant la ligne de commande (CLI) depuis le nœud Proxmox avec la commande suivantes :
```bash
qm create 900 \
--memory 2048 \
--core 1 \
--net0 virtio,bridge=vmbr0 \
--scsihw virtio-scsi-pci \
--bios ovmf \
--machine q35 \
--efidisk0 ceph-workload:0,pre-enrolled-keys=0 \
--name ubuntu-cloud
```
Cela crée une VM avec le support UEFI, 2GB de RAM, et un seul cœur. Le paramètre `efidisk0` spécifie une disque EFI.
### Importer le Disque OS
Maintenant, on importe l'image disque téléchargée comme disque primaire :
```bash
qm set 900 --scsi0 ceph-workload:0,import-from=/var/lib/vz/template/iso/noble-server-cloudimg-amd64.img
```
### Configurer Cloud-init
On ajoute un lecteur CD cloud-init à la VM :
```bash
qm set 900 --scsi1 ceph-workload:cloudinit
```
On définit l'ordre de démarrage pour donner la priorité au disque principal par rapport au CD :
```bash
qm set 900 --boot order=scsi0
```
On ajoute un port série pour l'accès console :
```bash
qm set 900 --serial0 socket --vga serial0
```
## Convertir en Template
Après avoir configuré la VM, on fait un clic droit dessus dans l'interface Web de Proxmox et sélectionnez `Convert to template`. La création du template est alors terminée.
## Conclusion
Cette méthode permet un déploiement rapide avec Proxmox de VM préconfigurées et cloud-init.
Le template peut désormais être utilisé pour générer de nouvelles instances avec des configurations personnalisées en fournissant les paramètres cloud-init nécessaires. Ceci est particulièrement utile pour déployer rapidement plusieurs instances avec des configurations de base similaires.

View File

@@ -1,224 +0,0 @@
---
slug: opnsense-crash-disk-panic
title: Mon Routeur OPNsense Crash, de la Panique à la Renaissance
description: L'histoire sur comment jai survécu à un crash OPNsense à cause d'une disque défaillant et pourquoi un fichier XML a tout changé.
date: 2025-08-24
draft: false
tags:
- opnsense
categories:
- homelab
---
## Intro
Cette semaine, jai vécu mon premier vrai problème dans mon homelab, qui a fait tomber tout mon réseau à la maison.
Mon routeur OPNsense a crash et, après plusieurs tentatives de récupération ratées, jai finalement dû le réinstaller from scratch. Heureusement, presque toute la configuration est revenue grâce à un simple fichier XML. Dans cette histoire, je vais raconter ce qui sest passé, ce que jai fait pour men sortir, et aussi ce que je naurais pas dû faire.
Ce genre dexercice est la pire chose que vous souhaitez voir arriver, parce que ce nest jamais amusant de voir tout exploser. Mais cest de loin la meilleure façon dapprendre.
## Le Calme Avant la Tempête
Ma box OPNsense tournait parfaitement depuis des mois. Routeur, pare-feu, DNS, DHCP, VLANs, VPN, reverse proxy et même contrôleur UniFi : toutes les pièces de mon homelab passe par elle. Mais pas seulement, elle fournit aussi Internet à la maison.
![Diagram of my home network ](img/homelab-network-schema.png)
Cette box est le cœur de mon réseau, sans elle, je ne peux quasiment rien faire. Jai détaillé son fonctionnement dans ma section [Homelab]({{< ref "page/homelab" >}}). Tout “fonctionnait juste”, et je ne men inquiétait pas. Jétais confiant, sa sauvegarde vivait uniquement à lintérieur de la machine…
Peut-être trop confiant.
## Le Redémarrage Inattendu
Sans prévenir, la box a redémarré toute seule, juste avant minuit. Par chance, je passais à côté de mon rack en allant me coucher. Jai su quelle avait redémarré car jai entendu son petit bip de démarrage.
Je me suis demandé pourquoi le routeur avait redémarré sans mon accord. Dans mon lit, jai rapidement vérifié si Internet fonctionnait : oui. Mais aucun de mes services nétait disponible, ni la domotique, ni ce blog. Jétais fatigué, je réglerais ça le lendemain…
Au matin, en regardant les logs, jai trouvé le coupable :
```
panic: double fault
```
Un kernel panic. Mon routeur avait littéralement planté au niveau matériel.
## Premières Tentatives de Dépannage
Au début, limpact semblait mineur. Un seul service ne redémarrait pas : Caddy, mon reverse proxy. Ce qui expliquait pourquoi mes services nétaient pas accessibles.
En fouillant dans les logs, jai trouvé lerreur :
```
caching certificate: decoding certificate metadata: unexpected end of JSON input
```
Un des certificats mis en cache avait été corrompu pendant le crash. En supprimant son dossier de cache, Caddy est reparti et, dun coup, tous mes services HTTPS étaient de retour.
Je pensais avoir esquivé la balle. Je nai pas cherché plus loin sur la cause réelle : les logs du kernel étaient pollués par une interface qui “flappait”, jai cru à un simple bug. À la place, je me suis lancé dans une mise à jour, ma première erreur.
Mon instance OPNsense était en version 25.1, et la 25.7 venait de sortir. Allons-y gaiement !
La mise à jour sest déroulée correctement, mais quelque chose clochait. En cherchant de nouvelles updates, jai vu une corruption dans `pkg`, la base de données du gestionnaire de paquets :
```
pkg: sqlite error while executing iterator in file pkgdb_iterator.c:1110: database disk image is malformed
```
🚨 Mon alarme interne s'est déclenchée. Jai pensé aux sauvegardes et jai immédiatement téléchargé la dernière :
![Backup configuration in OPNsense](img/opnsense-download-backup.png)
En cliquant sur le bouton `Download configuration`, jai récupéré le `config.xml` en cours dutilisation. Je pensais que ça suffirait.
## Corruption du Système de Fichiers
Jai tenté de réparer la base `pkg` de la pire façon possible : jai sauvegardé le dossier `/var/db/pkg` puis essayé de refaire un `bootstrap` :
```bash
cp -a /var/db/pkg /var/db/pkg.bak
pkg bootstrap -f
```
```
The package management tool is not yet installed on your system.
Do you want to fetch and install it now? [y/N]: y
Bootstrapping pkg from https://pkg.opnsense.org/FreeBSD:14:amd64/25.7/latest, please wait...
[...]
pkg-static: Fail to extract /usr/local/lib/libpkg.a from package: Write error
Failed to install the following 1 package(s): /tmp//pkg.pkg.scQnQs
[...]
A pre-built version of pkg could not be found for your system.
```
Jai vu un `Write error`. Je soupçonnais un problème disque. Jai lancé `fsck` et reçu un flot dincohérences :
```bash
fsck -n
```
```
[...]
INCORRECT BLOCK COUNT I=13221121 (208384 should be 208192)
INCORRECT BLOCK COUNT I=20112491 (8 should be 0)
INCORRECT BLOCK COUNT I=20352874 (570432 should be 569856)
[...]
FREE BLK COUNT(S) WRONG IN SUPERBLK
[...]
SUMMARY INFORMATION BAD
[...]
BLK(S) MISSING IN BIT MAPS
[...]
***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****
```
Le système de fichiers root était en mauvais état.
Nayant que SSH et pas de console, jai forcé un `fsck` au prochain redémarrage :
```bash
sysrc fsck_y_enable="YES"
sysrc background_fsck="NO"
reboot
```
Au redémarrage, le système a été réparé suffisamment pour relancer `pkg bootstrap`. Mais la moitié des paquets système avaient disparu. Ma mise à jour précédente sur un disque corrompu mavait laissé avec un système bancal, à moitié installé, à moitié manquant.
## Quand ça empire
Jai découvert lutilitaire `opnsense-bootstrap`, censé remettre le système à plat :
- Suppression de tous les paquets installés
- Téléchargement et installation dun nouveau noyau/base 25.7
- Réinstallation des paquets standards
Parfait !
```
opnsense-bootstrap
```
```
This utility will attempt to turn this installation into the latest OPNsense 25.7 release. All packages will be deleted, the base system and kernel will be replaced, and if all went well the system will automatically reboot. Proceed with this action? [y/N]:
```
Jai dit `y`. Ça commencé bien, puis… plus rien. Plus de signal. Plus dInternet. Je croyais que ce bootstrap allait me sauver. En fait, il ma enterré.
🙈 Oups.
Après un moment, j'ai tenté de le redémarré, mais impossible de me reconnecter en SSH. Pas le choix, j'ai du sortir le routeur du rack, le poser sur mon bureau, brancher écran et clavier et voir ce qui se passait.
## Repartir de zéro
Cétait mauvais signe :
```
Fatal error: Uncaught Error: Class "OPNsense\Core\Config" not found
in /usr/local/etc/inc/config.inc:143
```
Et les logs du bootstrap étaient pires :
```
bad dir ino … mangled entry
Input/output error
```
Le disque était pas en forme. Je ne pouvais plus rien sauver. Il était temps de repartir de zéro. Heureusement, javais une sauvegarde… non ?
Jai téléchargé lISO OPNsense 25.7, créé une clé USB bootable, et réinstallé par-dessus, en laissant les paramètres par défaut.
## Le sauveur : `config.xml`
OPNsense garde toute sa configuration dans un seul fichier : `/conf/config.xml`. Ce fichier a été ma bouée de sauvetage.
J'ai copié le `config.xml` sauvegardé avant dans ma clé USB. Quand je l'ai connectée sur la machine nouvellement installée, j'ai remplacé le fichier :
```bash
mount -t msdosfs /dev/da0s1 /mnt
cp /mnt/config.xml /conf/config.xml
```
Jai remis le routeur dans le rack, croisé les doigts… *bip !* 🎉
Le DHCP ma donné une adresse, bon signe. Je pouvais accéder à linterface web, super. Ma configuration était là, à peu près tout sauf les plugins, comme prévu. Je ne peux pas les installer immédiatement, car ils nécessitent une autre mise à jour. Mettons à jour !
Ce fichier XML à lui seul m'a permis de reconstruire mon routeur sans perdre la raison.
Sans DNS (AdGuard non installé), jai temporairement pointé le DNS pour le système vers `1.1.1.1`.
## Le Dernier Souffle
Lors de la mise à jour suivante, rebelote : erreurs, reboot, crash. La machine de nouveau plus accessible...
Je pouvais officiellement déclarer mon disque NVMe mort.
🪦 Repose en paix, merci pour tes loyaux services.
Par chance, javais un NVMe Kingston 512 Go encore neuf, livré avec cette machine. Je ne l'avais jamais utilisé car j'avais préféré réutiliser celui à l'intérieur de mon serveur *Vertex*.
Jai refait linstallation d'OPNsense dessus, et cette fois tout a fonctionné : passage en 25.7.1 et réinstallation des plugins officiels que j'utilisais.
Pour les plugins custom (AdGuard Home et UniFi), il a fallu ajouter le repo tiers dans `/usr/local/etc/pkg/repos/mimugmail.conf` (documentation [ici](https://www.routerperformance.net/opnsense-repo/))
```json
mimugmail: {
url: "https://opn-repo.routerperformance.net/repo/${ABI}",
priority: 5,
enabled: yes
}
```
Après un dernier reboot, le routeur était presque prêt, mais je n'avais toujours pas de DNS. C'était à cause de AdGuard Home qui n'était pas configuré
⚠️ La configuration des plugins tiers ne sont pas sauvegardés dans `config.xml`.
Reconfigurer AdGuard Home n'était pas bien compliqué, finalement mon DNS fonctionne et t out était revenu à la normale… sauf le contrôleur UniFi.
## Leçons Apprises à la Dure
- **Les sauvegardes comptent** : Je me retrouve toujours à penser que les sauvegardes ne sont pas fondamentales... jusqu'à ce qu'on ait besoin de restaurer et qu'il est trop tard.
- **Gardez-les sauvegardes hors de la machine** : jai eu de la chance de récupérer le `config.xml` avant que mon disque me lâche. J'aurais vraiment passer un mauvais moment à tout restaurer entièrement.
- **Vérifier la santé après un crash** : ne pas ignorer un kernel panic.
- **Erreurs I/O = alerte rouge** : jai perdu des heures à batailler avec un disque condamné.
- **Les plugins non-officiels ne sont pas sauvegardés** : La configuration d'OPNsense et de ces plugins officiels sont sauvegardés, ce n'est pas le cas pour les autres.
- **Mon routeur est un SPOF** (*Un point de défaillance unique*) : Dans mon homelab, je voulais avoir le maximum d'éléments hautement disponible, il me faut trouver une meilleure solution.
## Aller de lAvant
Je dois sérieusement repenser ma stratégie de sauvegarde. Jai toujours repoussé, jusquà ce quil soit trop tard. Ça faisait longtemps que je navais pas subi une panne matérielle. Quand ça arrive, ça pique.
Au départ, je pensais quun routeur sur son propre hardware était plus sûr. Javais tort. Je vais réfléchir à une virtualisation sous Proxmox pour lavoir en haute dispo. Un beau projet en perspective !
## Conclusion
Mon routeur OPNsense est passé dun simple redémarrage aléatoire à un disque mort, avec un vrai rollercoaster de dépannage. Au final, je suis presque content que ça soit arrivé : jai appris bien plus quavec une mise à jour sans accroc.
Si vous utilisez OPNsense (ou nimporte quel routeur), retenez ça :
**Gardez une sauvegarde hors de la machine.**
Parce que quand ça casse, et ça finira par casser, cest ce petit fichier XML qui peut sauver tout votre homelab.
Restez safe, faites des sauvegardes.

View File

@@ -1,225 +0,0 @@
---
slug: opnsense-crash-disk-panic
title: My OPNsense Router Crash, from Panic to Reborn
description: The story of how I survived an OPNsense crash with a failing disk and why one backup XML made all the difference.
date: 2025-08-24
draft: false
tags:
- opnsense
categories:
- homelab
---
## Intro
This week, I experienced my first real problem on my homelab, which caused my whole home network to go down.
My OPNsense router crashed and, after several failed recovery attempts, I finally had to reinstall it from scratch. Luckily, almost all of the configuration came back thanks to a single XML file. In that story, I will tell you what happened, what I did to recover and what I shouldn't have done.
This kind of exercise is the worst thing you want to happen because it's never funny to have everything go boom, but this is by far the best way to learn.
## The Calm Before the Storm
My OPNsense box had been running smoothly for months. Router, firewall, DNS, DHCP, VLANs, VPN, reverse proxy and even UniFi controller: all the pieces of my homelab run through it. but not only, it is also serving internet at home.
![Diagram of my home network ](img/homelab-network-schema.png)
This box is the heart of my network, without it, I can hardly do anything. I have detailed how this is working in my [Homelab]({{< ref "page/homelab" >}}) section. It was “just working,” and I wasnt worried about it. I felt confident, its backup was living only inside the machine...
Maybe too confident.
## The Unexpected Reboot
Out of nowhere, the box rebooted by itself just before midnight. By chance, I was just passing by my rack on my way to bed. I knew it had rebooted because I heard its little startup beep.
I wondered why the router restarted without my will. In my bed, I quickly checked if internet was working, and it was. But none of my services were available, my home automation or even this blog. I was tired, I would fix that the next day...
In the morning, looking at the logs, I found the culprit:
```
panic: double fault
```
A kernel panic. My router had literally crashed at the hardware level.
## First Troubleshooting Attempts
At first, the impact seemed minor. Only one service wasnt coming back up: Caddy, my reverse proxy. That was making sense if my services weren't available.
Digging into the logs, I found the error:
```
caching certificate: decoding certificate metadata: unexpected end of JSON input
```
It turned out that one of the cached certificates had been corrupted during the crash. Deleting its cache folder fixed Caddy, and suddenly all my HTTPS services were back online.
I thought I had dodged the bullet. I didn't investigate much on the root cause analysis: the kernel logs were polluted by one of the interfaces flapping, I thought it was just a bug. Instead, I went ahead and checked for updates, my first mistake.
My OPNsense instance was in version 25.1, and the newer 25.7 was available. Let's upgrade it, yay!
The upgrade rolled out successfully, but something was wrong. When I tried to look for any update, I saw a corruption in `pkg`, the package manager database:
```
pkg: sqlite error while executing iterator in file pkgdb_iterator.c:1110: database disk image is malformed
```
🚨 My internal alarm sensor triggered, I wondered about backups, I immediately decided to download the latest backup:
![Backup configuration in OPNsense](img/opnsense-download-backup.png)
Clicking the `Download configuration` button, I downloaded the current `config.xml` in use my the instance, I though it was enough.
## Filesystem Corruption
I decided to recover the pkg database the worst possible way, I backed up the `/var/db/pkg` folder and I tried to `bootstrap` it:
```bash
cp -a /var/db/pkg /var/db/pkg.bak
pkg bootstrap -f
```
```
The package management tool is not yet installed on your system.
Do you want to fetch and install it now? [y/N]: y
Bootstrapping pkg from https://pkg.opnsense.org/FreeBSD:14:amd64/25.7/latest, please wait...
[...]
pkg-static: Fail to extract /usr/local/lib/libpkg.a from package: Write error
Failed to install the following 1 package(s): /tmp//pkg.pkg.scQnQs
[...]
A pre-built version of pkg could not be found for your system.
```
I saw a `Write error`, I suspect a filesystem problem, I run a check on `fsck`, the output was a flood of inconsistencies:
```bash
fsck -n
```
```
[...]
INCORRECT BLOCK COUNT I=13221121 (208384 should be 208192)
INCORRECT BLOCK COUNT I=20112491 (8 should be 0)
INCORRECT BLOCK COUNT I=20352874 (570432 should be 569856)
[...]
FREE BLK COUNT(S) WRONG IN SUPERBLK
[...]
SUMMARY INFORMATION BAD
[...]
BLK(S) MISSING IN BIT MAPS
[...]
***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****
```
The root filesystem was in bad shape.
Since I only had SSH at this point and no console access, I set up a forced `fsck` for next reboot:
```bash
sysrc fsck_y_enable="YES"
sysrc background_fsck="NO"
reboot
```
On the next boot, the filesystem was repaired enough to let me bootstrap `pkg` again, but most of the system packages were gone. My earlier upgrade while the disk was dirty had left me with a half-installed, half-missing software.
## When Things Got Worse
I discovered the utility `opnsense-bootstrap`, which promises to reinstall all packages and reset the system to a clean release, exactly what I was looking for:
- Remove all installed packages.
- Fresh 25.7 base system and kernel will be downloaded and installed.
- All standard OPNsense packages will be reinstalled.
Wonderful!
```
opnsense-bootstrap
```
```
This utility will attempt to turn this installation into the latest OPNsense 25.7 release. All packages will be deleted, the base system and kernel will be replaced, and if all went well the system will automatically reboot. Proceed with this action? [y/N]:
```
I pressed `y`. This started well, but then... no more signal -> no more internet. I thought this bootstrap would save me. Instead, it buried me.
🙈 Oops.
After a while, I tried to reboot, but impossible to connect back via SSH. No other solution, I had to remove the router from the rack, put it on my desk and plug it a screen and a keyboard to see what is going on.
## Starting Over the Hard Way
This was bad:
```
Fatal error: Uncaught Error: Class "OPNsense\Core\Config" not found
in /usr/local/etc/inc/config.inc:143
```
Checking the bootstrap logs, this was even worse:
```
bad dir ino … mangled entry
Input/output error
```
The disk is in a bad shape, at this point, I couldnt save the install anymore. Time to start from scratch. Luckily, I had a backup… right?
I downloaded the latest OPNsense ISO (v25.7) and put it into a USB stick. I reinstall OPNsense and overwrite the current installation, I kept everything as default.
## The Lifesaver: `config.xml`
OPNsense keeps the whole configuration in a single file: `/conf/config.xml`. That file was my lifeline.
I copied the `config.xml`file saved earlier into the USB stick. When plugged into the fresh OPNsense box, I overwrite the file:
```bash
mount -t msdosfs /dev/da0s1 /mnt
cp /mnt/config.xml /conf/config.xml
```
I placed the router back in the rack, powered it on and crossed my fingers... *beep!* 🎉
The DHCP gave me an address, good start. I could reach its URL, awesome. My configuration is here, almost everything but the plugins, as expected. I can't install them right away because they need another update, let's update it!
This single XML file is the reason I could rebuild my router without losing my sanity
DNS is KO because the AdGuard Home plugin is not installed, I temporary set the system DNS to `1.1.1.1`
## The Last Breath
During that upgrade, the system threw errors again… and then rebooted itself. Another crash, not turning back on...
I can officially say that my NVMe drive is dead.
🪦 Rest in peace, thank you for your great services.
Luckily, I had a spare 512GB Kingston NVMe that came with that box. I never used it because I preferred to reuse the one inside my *Vertex* server.
I redo the same steps to reinstall OPNsense on that disk and this time everything worked: I could finally update OPNsense to 25.7.1 and reinstall all the official plugins that I was using.
To install custom plugins (AdGuard Home and Unifi), I had to add the custom repository in `/usr/local/etc/pkg/repos/mimugmail.conf` (documentation [here](https://www.routerperformance.net/opnsense-repo/))
```json
mimugmail: {
url: "https://opn-repo.routerperformance.net/repo/${ABI}",
priority: 5,
enabled: yes
}
```
After a final reboot, the router is almost ready, but I still don't have DNS services. This is because AdGuard Home is not configured.
⚠️ Custom plugin configuration is not saved within the backup in `config.xml`.
Reconfigure AdGuard Home is pretty straight forward, finally my DNS is working and everything is back to nominal... except the UniFi controller.
## Lessons Learned the Hard Way
- **Backups matter**: I always found myself thinking backups are not relevant... until you need to restore and it's too late.
- **Keep backups off the box**: I was lucky to get the `config.xml` before my disk die, I would have a really hard time to fully recover.
- **Healthcheck after a crash**: Do not ignore a kernel panic.
- **I/O errors = red flag**: I should have stopped trying to repair. I lost hours fighting a dead disk.
- **Custom plugin configs arent include**d: OPNsense configuration and its official plugin are saved into the backups, this is not the case for the others.
- **My router is a SPOF** (*single point of failure*): In my homelab, I wanted to have most of my elements highly available, I need to find a better solution.
## Moving Forward
I really need to think on my backup strategy. I'm too lazy and always keep it for later, until it is too late. It's been a long time since I've been struck by a hardware failure. When it strikes, it hurts.
Initially I wanted my router to be in its own hardware because I thought it was safe, I was damn wrong. I will think on a solution to virtualize OPNsense in Proxmox to have it highly available, a great project in perspective!
## Conclusion
My OPNsense router went from a random reboot to a dead disk, with a rollercoaster of troubleshooting. In the end, I'm almost happy with what happened, it taught me more than any smooth upgrade ever could.
If you run OPNsense (or any router), remember this:
**Keep a backup off the box.**
Because when things go wrong, and eventually they will, that one little XML file can save your homelab.
Stay safe, make backups.

View File

@@ -1,148 +0,0 @@
---
slug: proxmox-cluster-networking-sdn
title: Simplifier la gestion des VLAN dans Proxmox VE avec le SDN
description: Découvrez comment centraliser la configuration des VLAN dans Proxmox VE grâce aux zones SDN et aux VNets, pour un réseau plus simple et cohérent.
date: 2025-09-12
draft: false
tags:
- proxmox
categories:
- homelab
---
## Intro
Quand jai construit mon cluster **Proxmox VE 8** pour la première fois, le réseau nétait pas ma priorité. Je voulais simplement remplacer rapidement un vieux serveur physique, alors jai donné la même configuration de base à chacun de mes trois nœuds, créé le cluster et commencé à créer des VM :
![Configuration réseau dun nœud Proxmox](img/proxmox-node-network-configuration.png)
Cela a bien fonctionné pendant un moment. Mais comme je prévois de virtualiser mon routeur **OPNsense**, jai besoin de quelque chose de plus structuré et cohérent. Cest là que la fonctionnalité **S**oftware-**D**efined **N**etworking (SDN) de Proxmox entre en jeu.
---
## Mon Réseau Homelab
Par défaut, chaque nœud Proxmox dispose de sa propre zone locale, appelée `localnetwork`, qui contient le pont Linux par défaut (`vmbr0`) comme VNet :
![Proxmox default `localnetwork` zones](img/proxmox-default-localnetwork-zone.png)
Cest suffisant pour des configurations isolées, mais rien nest coordonné au niveau du cluster.
Mon objectif est simple : déclarer les VLAN que jutilise déjà dans mon réseau, afin de pouvoir y rattacher des VM facilement depuis nimporte quel nœud.
Voici la liste des VLAN que jutilise actuellement :
| Nom | ID | Usage |
| --------- | ---- | ------------------------------ |
| Mgmt | 1 | Administration |
| User | 13 | Réseau domestique |
| IoT | 37 | IoT et équipements non fiables |
| DMZ | 55 | Services exposés à Internet |
| Lab | 66 | Réseau de lab |
| Heartbeat | 77 | Heartbeat du cluster Proxmox |
| Ceph | 99 | Stockage Ceph |
| VPN | 1337 | Réseau WireGuard |
---
## Aperçu du SDN Proxmox
Le Software-Defined Networking de Proxmox permet de définir des zones et réseaux virtuels à léchelle du cluster. Au lieu de répéter la configuration des VLAN sur chaque nœud, le SDN offre une vue centralisée et assure la cohérence.
En interne, Proxmox repose essentiellement sur les fonctionnalités réseau standard de Linux, ce qui évite dajouter des dépendances externes et garantit la stabilité.
Les configurations SDN sont stockées dans `/etc/pve/sdn` et répliquées sur lensemble du cluster. Les changements sont appliqués de manière atomique (on prépare les modifications puis on clique sur `Apply`), ce qui rend les déploiements plus sûrs.
### Zones
Une **Zone** définit un domaine réseau séparé. Les zones peuvent couvrir certains nœuds et contenir des **VNets**.
Proxmox prend en charge plusieurs types de zones :
- **Simple** : pont isolé (bridge) avec routage L3/NAT
- **VLAN** : segmentation classique via VLAN
- **QinQ** : empilement de VLAN (IEEE 802.1ad)
- **VXLAN** : réseau L2 via encapsulation UDP
- **EVPN** : VXLAN avec BGP pour du routage L3 dynamique
Comme mon réseau domestique utilise déjà des VLAN, jai créé une **zone VLAN** appelée `homelan`, en utilisant `vmbr0` comme pont et en lappliquant à tout le cluster :
![Create a VLAN zone in the Proxmox SDN](img/proxmox-create-vlan-zone-homelan.png)
### VNets
Un **VNet** est un réseau virtuel à lintérieur dune zone. Dans une zone VLAN, chaque VNet correspond à un ID VLAN spécifique.
Jai commencé par créer `vlan55` dans la zone `homelan` pour mon réseau DMZ :
![Create a VNet for VLAN 55 in the homelan zone](img/proxmox-create-vlan-vnet-homelan.png)
Puis jai ajouté les VNets correspondant à la plupart de mes VLAN, puisque je prévois de les rattacher à une VM OPNsense :
![All my VLANs created in the Proxmox SDN](img/proxmox-sdn-all-vlan-homelan.png)
Enfin, jai appliqué la configuration dans **Datacenter → SDN** :
![Application de la configuration SDN dans Proxmox](img/proxmox-apply-sdn-homelan-configuration.png)
---
## Test de la Configuration Réseau
Dans une vieille VM que je n'utilise plus, je remplace l'actuel `vmbr0` avec le VLAN tag 66 par mon nouveau VNet `vlan66`:
![Change the network bridge in a VM](img/proxmox-change-vm-nic-vlan-vnet.png)
Après l'avoir démarrée, la VM obtient une IP du DHCP d'OPNsense sur ce VLAN, ce qui est super. J'essaye également de ping une autre machine et ça fonctionne :
![Ping another machine in the same VLAN](img/proxmox-console-ping-vm-vlan-66.png)
---
## Mise à jour de Cloud-Init et Terraform
Pour aller plus loin, jai mis à jour le pont réseau utilisé dans mon **template cloud-init**, dont j'avais détaillé la création dans [cet article]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
Comme avec la VM précédente, jai remplacé `vmbr0` et le tag VLAN 66 par le nouveau VNet `vlan66`.
Jai aussi adapté mon code **Terraform** pour refléter ce changement :
![Mise à jour du code Terraform pour vlan66](img/terraform-code-update-vlan66.png)
Ensuite, jai validé quaucune régression nétait introduite en déployant une VM de test :
```bash
terraform apply -var 'vm_name=vm-test-vnet'
```
```plaintext
data.proxmox_virtual_environment_vms.template: Reading...
data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=23b17aea-d9f7-4f28-847f-41bb013262ea]
[...]
Plan: 2 to add, 0 to change, 0 to destroy.
Changes to Outputs:
+ vm_ip = (known after apply)
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
proxmox_virtual_environment_file.cloud_config: Creating...
proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
proxmox_virtual_environment_vm.vm: Creating...
proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
[...]
proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
proxmox_virtual_environment_vm.vm: Creation complete after 3m9s [id=119]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.181"
```
La création sest déroulée sans problème, tout est bon :
![VM déployée par Terraform sur vlan66](img/proxmox-terraform-test-deploy-vlan66.png)
---
## Conclusion
La mise en place du SDN Proxmox avec une **zone VLAN** est simple et très pratique. Au lieu de définir manuellement un VLAN sur chaque VM, je sélectionne désormais directement le bon VNet, et tout reste cohérent dans le cluster.
| Étape | Avant SDN | Après SDN |
| -------------------- | ----------------------------- | ----------------------------------- |
| Rattacher une VM | `vmbr0` + tag VLAN manuel | Sélection du VNet approprié |
| VLANs sur les nœuds | Config répété sur chaque nœud | Centralisé via le SDN du cluster |
| Gestion des adresses | Manuel ou via DHCP uniquement | IPAM optionnel via sous-réseaux SDN |
Mon cluster est maintenant prêt à héberger mon **routeur OPNsense**, et cette base ouvre la voie à dautres expérimentations, comme les overlays VXLAN ou lEVPN avec BGP.
À suivre pour la prochaine étape !

View File

@@ -1,148 +0,0 @@
---
slug: proxmox-cluster-networking-sdn
title: Simplifying VLAN Management in Proxmox VE with SDN
description: Learn how to centralize VLAN configuration in Proxmox VE using SDN zones and VNets, making VM networking easier and more consistent.
date: 2025-09-12
draft: false
tags:
- proxmox
categories:
- homelab
---
## Intro
When I first built my **Proxmox VE 8** cluster, networking wasnt my main concern. I just wanted to replace an old physical server quickly, so I gave each of my three nodes the same basic config, created the cluster, and started running VMs:
![Configuration réseau dun nœud Proxmox](img/proxmox-node-network-configuration.png)
That worked fine for a while. But as I plan to virtualize my **OPNsense** router, I need something more structured and consistent. This is where Proxmox **S**oftware-**D**efined **N**etworking (SDN) feature comes in.
---
## My Homelab Network
By default, every Proxmox node comes with its own local zone, called `localnetwork`, which contains the default Linux bridge (`vmbr0`) as a VNet:
![Proxmox default `localnetwork` zones](img/proxmox-default-localnetwork-zone.png)
Thats fine for isolated setups, but at the cluster level nothing is coordinated.
What I want is simple: declare the VLANs I already use in my network, so I can attach VMs to them easily from any node.
Heres the list of VLANs I use today:
| Name | ID | Purpose |
| --------- | ---- | ---------------------------- |
| Mgmt | 1 | Management |
| User | 13 | Home network |
| IoT | 37 | IoT and untrusted equipments |
| DMZ | 55 | Internet facing |
| Lab | 66 | Lab network |
| Heartbeat | 77 | Proxmox cluster heartbeat |
| Ceph | 99 | Ceph storage |
| VPN | 1337 | Wireguard network |
---
## Proxmox SDN Overview
Proxmox Software-Defined Networking makes it possible to define cluster-wide virtual zones and networks. Instead of repeating VLAN configs on every node, SDN gives you a central view and ensures consistency.
Under the hood, Proxmox mostly uses standard Linux networking, avoiding extra dependencies and keeping things stable.
SDN configurations are stored in `/etc/pve/sdn`, which is replicated across the cluster. Changes are applied atomically (you prepare them, then hit `Apply` once), making rollouts safer.
### Zones
A **Zone** defines a separate networking domain. Zones can span specific nodes and contain **VNets**.
Proxmox supports several zone types:
- **Simple**: Isolated Bridge. A simple layer 3 routing bridge (NAT)
- **VLAN**: Virtual LANs are the classic method of subdividing a LAN
- **QinQ**: Stacked VLAN (IEEE 802.1ad)
- **VXLAN**: Layer 2 VXLAN network via a UDP tunnel
- **EVPN**: VXLAN with BGP to establish Layer 3 routing
Since my home network already relies on VLANs, I created a **VLAN Zone** named `homelan`, using `vmbr0` as the bridge and applying it cluster-wide:
![Create a VLAN zone in the Proxmox SDN](img/proxmox-create-vlan-zone-homelan.png)
### VNets
A **VNet** is a virtual network inside a zone. In a VLAN zone, each VNet corresponds to a specific VLAN ID.
I started by creating `vlan55` in the `homelan` zone for my DMZ network:
![Create a VNet for VLAN 55 in the homelan zone](img/proxmox-create-vlan-vnet-homelan.png)
Then I added VNets for most of my VLANs, since I plan to attach them to an OPNsense VM:
![All my VLANs created in the Proxmox SDN](img/proxmox-sdn-all-vlan-homelan.png)
Finally, I applied the configuration in **Datacenter → SDN**:
![Application de la configuration SDN dans Proxmox](img/proxmox-apply-sdn-homelan-configuration.png)
---
## Test the Network Configuration
In a old VM which I don't use anymore, I replace the current `vmbr0` with VLAN tag 66 to my new VNet `vlan66`:
![Change the network bridge in a VM](img/proxmox-change-vm-nic-vlan-vnet.png)
After starting it, the VM gets an IP from the DHCP on OPNsense on that VLAN, which sounds good. I also try to ping another machine and it works:
![Ping another machine in the same VLAN](img/proxmox-console-ping-vm-vlan-66.png)
---
## Update Cloud-Init Template and Terraform
To go further, I update the bridge used in my **cloud-init** template, which I detailed the creation in that [post]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}). Pretty much the same thing I've done with the VM, I replace the current `vmbr0` with VLAN tag 66 with my new VNet `vlan66`.
I also update the **Terrafom** code to take this change into account:
![Mise à jour du code Terraform pour vlan66](img/terraform-code-update-vlan66.png)
I quicky check if I don't have regression and can still deploy a VM with Terraform:
```bash
terraform apply -var 'vm_name=vm-test-vnet'
```
```plaintext
data.proxmox_virtual_environment_vms.template: Reading...
data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=23b17aea-d9f7-4f28-847f-41bb013262ea]
[...]
Plan: 2 to add, 0 to change, 0 to destroy.
Changes to Outputs:
+ vm_ip = (known after apply)
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
proxmox_virtual_environment_file.cloud_config: Creating...
proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
proxmox_virtual_environment_vm.vm: Creating...
proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
[...]
proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
proxmox_virtual_environment_vm.vm: Creation complete after 3m9s [id=119]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.181"
```
The VM is deploying without any issue, everything is OK:
![VM déployée par Terraform sur vlan66](img/proxmox-terraform-test-deploy-vlan66.png)
---
## Conclusion
Setting up Proxmox SDN with a **VLAN zone** turned out to be straightforward and very useful. Instead of tagging VLANs manually per VM, I now just pick the right VNet, and everything stays consistent across the cluster.
| Step | Before SDN | After SDN |
| ----------------- | ------------------------------- | ------------------------------ |
| Attach VM to VLAN | `vmbr0` + set VLAN tag manually | Select the right VNet directly |
| VLANs on nodes | Repeated config per node | Centralized in cluster SDN |
| IP management | Manual or DHCP only | Optional IPAM via SDN subnets |
This prepares my cluster to host my **OPNsense router**, and it also sets the stage for future experiments, like trying out VXLAN overlays or EVPN with BGP.
See you next time for the next step!

View File

@@ -1,281 +0,0 @@
---
slug: opnsense-virtualization-highly-available
title: Construire un Cluster OPNsense Hautement Disponible sur Proxmox VE
description: Une preuve de concept montrant comment virtualiser OPNsense sur Proxmox VE, configurer la haute disponibilité avec CARP et pfSync, et gérer une seule IP WAN.
date: 2025-09-29
draft: false
tags:
- opnsense
- proxmox
- high-availability
categories:
- homelab
---
## Intro
Jai récemment rencontré mon premier vrai problème, ma box **OPNsense** physique a planté à cause dun _kernel panic_. Jai détaillé ce qu'il s'est passé dans [cet article]({{< ref "post/10-opnsense-crash-disk-panic" >}}).
Cette panne ma fait repenser mon installation. Un seul pare-feu est un point de défaillance unique, donc pour améliorer la résilience jai décidé de prendre une nouvelle approche : **virtualiser OPNsense**.
Évidemment, faire tourner une seule VM ne suffirait pas. Pour obtenir une vraie redondance, il me faut deux instances OPNsense en **Haute Disponibilité**, lune active et lautre en attente.
Avant de déployer ça sur mon réseau, jai voulu valider lidée dans mon homelab. Dans cet article, je vais détailler la preuve de concept : déployer deux VM OPNsense dans un cluster **Proxmox VE** et les configurer pour fournir un pare-feu hautement disponible.
---
## Infrastructure Actuelle
Au sommet de mon installation, mon modem FAI, une _Freebox_ en mode bridge, relié directement à linterface `igc0` de ma box OPNsense, servant dinterface **WAN**. Sur `igc1`, le **LAN** est connecté à mon switch principal via un port trunk, avec le VLAN 1 comme VLAN natif pour mon réseau de management.
Ce switch relie également mes trois nœuds Proxmox, chacun sur un port trunk avec le même VLAN natif. Chaque nœud dispose de deux cartes réseau : une pour le trafic général, et lautre dédiée au réseau de stockage Ceph, connecté à un switch séparé de 2,5 Gbps.
Depuis le crash dOPNsense, jai simplifié larchitecture en supprimant le lien LACP, qui napportait pas de réelle valeur :
![Current homelab network diagram](img/homelan-current-physical-layout.png)
Jusquà récemment, le réseau Proxmox de mon cluster était très basique : chaque nœud était configuré individuellement sans véritable logique commune. Cela a changé après la découverte du SDN Proxmox, qui ma permis de centraliser les définitions de VLAN sur lensemble du cluster. Jai décrit cette étape dans [cet article]({{< ref "post/11-proxmox-cluster-networking-sdn" >}}).
---
## Preuve de Concept
Place au lab. Voici les étapes principales :
1. Ajouter quelques VLANs dans mon homelab
2. Créer un faux routeur FAI
3. Construire deux VMs OPNsense
4. Configurer la haute disponibilité
5. Tester la bascule
![Diagram of the POC for OPNsense high availability](img/poc-opnsense-diagram.png)
### Ajouter des VLANs dans mon homelab
Pour cette expérimentation, je crée trois nouveaux VLANs :
- **VLAN 101** : _POC WAN_
- **VLAN 102** : _POC LAN_
- **VLAN 103** : _POC pfSync_
Dans linterface Proxmox, je vais dans `Datacenter` > `SDN` > `VNets` et je clique sur `Create` :
![Create POC VLANs in the Proxmox SDN](img/proxmox-sdn-create-poc-vlans.png)
Une fois les trois VLANs créés, japplique la configuration.
Jajoute ensuite ces trois VLANs dans mon contrôleur UniFi. Ici, seul lID et le nom sont nécessaires, le contrôleur se charge de les propager via les trunks connectés à mes nœuds Proxmox VE.
### Créer une VM “Fausse Box FAI”
Pour simuler mon modem FAI actuel, jai créé une VM appelée `fake-freebox`. Cette VM route le trafic entre les réseaux _POC WAN_ et _Lab_, et fait tourner un serveur DHCP qui ne délivre quun seul bail, exactement comme ma vraie Freebox en mode bridge.
Cette VM dispose de 2 cartes réseau, que je configure avec Netplan :
- `eth0` (_POC WAN_ VLAN 101) : adresse IP statique `10.101.0.254/24`
- `enp6s19` (Lab VLAN 66) : adresse IP obtenue en DHCP depuis mon routeur OPNsense actuel, en amont
```yaml
network:
version: 2
ethernets:
eth0:
addresses:
- 10.101.0.254/24
enp6s19:
dhcp4: true
```
Jactive ensuite le routage IP pour permettre à cette VM de router le trafic :
```bash
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
sudo sysctl -p
```
Puis je configure du masquage (NAT) afin que les paquets sortant via le réseau Lab ne soient pas rejetés par mon OPNsense actuel :
```bash
sudo iptables -t nat -A POSTROUTING -o enp6s19 -j MASQUERADE
sudo apt install iptables-persistent -y
sudo netfilter-persistent save
```
Jinstalle `dnsmasq` comme serveur DHCP léger :
```bash
sudo apt install dnsmasq -y
```
Dans `/etc/dnsmasq.conf`, je configure un bail unique (`10.101.0.150`) et je pointe le DNS vers mon OPNsense actuel, sur le VLAN _Lab_ :
```
interface=eth0
bind-interfaces
dhcp-range=10.101.0.150,10.101.0.150,255.255.255.0,12h
dhcp-option=3,10.101.0.254 # default gateway = this VM
dhcp-option=6,192.168.66.1 # DNS server
```
Je redémarre le service `dnsmasq` pour appliquer la configuration :
```bash
sudo systemctl restart dnsmasq
```
La VM `fake-freebox` est maintenant prête à fournir du DHCP sur le VLAN 101, avec un seul bail disponible.
### Construire les VMs OPNsense
Je commence par télécharger lISO dOPNsense et je lupload sur un de mes nœuds Proxmox :
![Upload de lISO OPNsense dans Proxmox](img/proxmox-upload-opnsense-iso.png)
#### Création de la VM
Je crée la première VM `poc-opnsense-1` avec les paramètres suivants :
- Type dOS : Linux (même si OPNsense est basé sur FreeBSD)
- Type de machine : `q35`
- BIOS : `OVMF (UEFI)`, stockage EFI sur mon pool Ceph
- Disque : 20 Gio sur Ceph
- CPU/RAM : 2 vCPU, 2 Gio de RAM
- Cartes réseau :
1. VLAN 101 (_POC WAN_)
2. VLAN 102 (_POC LAN_)
3. VLAN 103 (_POC pfSync_)
![OPNsense VM settings in Proxmox](img/proxmox-create-poc-vm-opnsense.png)
Avant de la démarrer, je clone cette VM pour préparer la seconde : `poc-opnsense-2`
Au premier démarrage, je tombe sur une erreur “access denied”. Pour corriger, jentre dans le BIOS, **Device Manager > Secure Boot Configuration**, je décoche _Attempt Secure Boot_ et je redémarre :
![Disable Secure Boot in Proxmox BIOS](img/proxmox-disable-secure-boot-option.png)
#### Installation dOPNsense
La VM démarre sur lISO, je ne touche à rien jusquà lécran de login :
![OPNsense CLI login screen in LiveCD](img/opnsense-vm-installation-welcome.png)
Je me connecte avec `installer` / `opnsense` et je lance linstallateur. Je sélectionne le disque QEMU de 20 Go comme destination et je démarre linstallation :
![Barre de progression de linstallation OPNsense](img/opnsense-vm-installation-progress-bar.png)
Une fois terminé, je retire lISO du lecteur et je redémarre la machine.
#### Configuration de Base dOPNsense
Au redémarrage, je me connecte avec `root` / `opnsense` et jarrive au menu CLI :
![Menu CLI après une installation fraîche dOPNsense](img/opnsense-vm-installation-cli-menu.png)
Avec loption 1, je réassigne les interfaces :
![Configuration des interfaces dans OPNsense via le CLI](img/opnsense-vm-installation-assign-interfaces.png)
Linterface WAN récupère bien `10.101.0.150/24` depuis la `fake-freebox`. Je configure le LAN sur `10.102.0.2/24` et jajoute un pool DHCP de `10.102.0.10` à `10.102.0.99` :
![Interface WAN OPNsense recevant une IP depuis la VM `fake-freebox`](img/opnsense-vm-installation-interfaces-configured.png)
✅ La première VM est prête, je reproduis lopération pour la seconde OPNsense `poc-opnsense-2`, qui aura lIP `10.102.0.3`.
### Configurer OPNsense en Haute Disponibilité
Avec les deux VMs OPNsense opérationnelles, il est temps de passer à la configuration via le WebGUI. Pour y accéder, jai connecté une VM Windows au VLAN _POC LAN_ et ouvert lIP de lOPNsense sur le port 443 :
![OPNsense WebGUI depuis une VM Windows](img/opnsense-vm-webgui-from-poc-lan.png)
#### Ajouter lInterface pfSync
La troisième carte réseau (`vtnet2`) est assignée à linterface _pfSync_. Ce réseau dédié permet aux deux firewalls de synchroniser leurs états via le VLAN _POC pfSync_ :
![Add pfSync interface in OPNsense](img/opnsense-vm-assign-pfsync-interface.png)
Jactive linterface sur chaque instance et je leur attribue une IP statique :
- **poc-opnsense-1** : `10.103.0.2/24`
- **poc-opnsense-2** : `10.103.0.3/24`
Puis, jajoute une règle firewall sur chaque nœud pour autoriser tout le trafic provenant de ce réseau sur linterface _pfSync_ :
![Create new firewall rule on pfSync interface to allow any traffic in that network](img/opnsense-vm-firewall-allow-pfsync.png)
#### Configurer la Haute Disponibilité
Direction `System` > `High Availability` > `Settings`.
- Sur le master (`poc-opnsense-1`), je configure les `General Settings` et les `Synchronization Settings`.
- Sur le backup (`poc-opnsense-2`), seuls les `General Settings` suffisent (on ne veut pas quil écrase la config du master).
![OPNsense High Availability settings](img/opnsense-vm-high-availability-settings.png)
Une fois appliqué, je vérifie la synchro dans longlet `Status` :
![OPNsense High Availability status](img/opnsense-vm-high-availability-status.png)
#### Créer une IP Virtuelle
Pour fournir une passerelle partagée aux clients, je crée une IP virtuelle (VIP) en **CARP** (Common Address Redundancy Protocol) sur linterface LAN. LIP est portée par le nœud actif et bascule automatiquement en cas de failover.
Menu : `Interfaces` > `Virtual IPs` > `Settings` :
![Create CARP virtual IP in OPNsense](img/opnsense-vm-create-vip-carp.png)
Je réplique ensuite la config depuis `System > High Availability > Status` avec le bouton `Synchronize and reconfigure all`.
Sur `Interfaces > Virtual IPs > Status`, le master affiche la VIP en `MASTER` et le backup en `BACKUP`.
#### Reconfigurer le DHCP
Pour la HA, il faut adapter le DHCP. Comme **Dnsmasq** ne supporte pas la synchro des baux, chaque instance doit répondre indépendamment.
Sur le master :
- `Services` > `Dnsmasq DNS & DHCP` > `General` : cocher `Disable HA sync`
- `DHCP ranges` : cocher aussi `Disable HA sync`
- `DHCP options` : ajouter loption `router [3]` avec la valeur `10.102.0.1` (VIP LAN)
- `DHCP options` : cloner la règle pour `dns-server [6]` vers la même VIP.
![Edit DHCP options for Dnsmasq in OPNsense](img/opnsense-vm-dnsmasq-add-option.png)
Sur le backup :
- `Services` > `Dnsmasq DNS & DHCP` > `General` : cocher `Disable HA sync`
- Régler `DHCP reply delay` à `5` secondes (laisser la priorité au master)
- `DHCP ranges` : définir un autre pool, plus petit (`10.102.0.200 -> 220`).
Ainsi, seules les **options** DHCP sont synchronisées, les plages restant distinctes.
#### Interface WAN
Mon modem FAI nattribue quune seule IP en DHCP, je ne veux pas que mes 2 VMs entrent en compétition. Pour gérer ça :
1. Dans Proxmox, je copie ladresse MAC de `net0` (WAN) de `poc-opnsense-1` et je lapplique à `poc-opnsense-2`. Ainsi, le bail DHCP est partagé.
⚠️ Si les deux VMs activent la même MAC en même temps, cela provoque des conflits ARP et peut casser le réseau. Seul le MASTER doit activer son WAN.
2. Un hook event CARP procure la possibilité de lancer des scripts. Jai déployé ce [script Gist](https://gist.github.com/spali/2da4f23e488219504b2ada12ac59a7dc#file-10-wancarp) dans `/usr/local/etc/rc.syshook.d/carp/10-wan` sur les deux nœuds. Ce script active le WAN uniquement sur le MASTER.
```php
#!/usr/local/bin/php
<?php
require_once("config.inc");
require_once("interfaces.inc");
require_once("util.inc");
require_once("system.inc");
$subsystem = !empty($argv[1]) ? $argv[1] : '';
$type = !empty($argv[2]) ? $argv[2] : '';
if ($type != 'MASTER' && $type != 'BACKUP') {
log_error("Carp '$type' event unknown from source '{$subsystem}'");
exit(1);
}
if (!strstr($subsystem, '@')) {
log_error("Carp '$type' event triggered from wrong source '{$subsystem}'");
exit(1);
}
$ifkey = 'wan';
if ($type === "MASTER") {
log_error("enable interface '$ifkey' due CARP event '$type'");
$config['interfaces'][$ifkey]['enable'] = '1';
write_config("enable interface '$ifkey' due CARP event '$type'", false);
interface_configure(false, $ifkey, false, false);
} else {
log_error("disable interface '$ifkey' due CARP event '$type'");
unset($config['interfaces'][$ifkey]['enable']);
write_config("disable interface '$ifkey' due CARP event '$type'", false);
interface_configure(false, $ifkey, false, false);
}
```
### Tester le Failover
Passons aux tests !
OPNsense propose un _CARP Maintenance Mode_. Avec le master actif, seul lui avait son WAN monté. En activant le mode maintenance, les rôles basculent : le master devient backup, son WAN est désactivé et celui du backup est activé :
![Mode maintenance CARP dans OPNsense](img/opnsense-vm-carp-status.png)
Pendant mes pings vers lextérieur, aucune perte de paquets au moment du basculement.
Ensuite, jai simulé un crash en éteignant le master. Le backup a pris le relais de façon transparente, seulement un paquet perdu, et grâce à la synchro des états, même ma session SSH est restée ouverte. 🎉
## Conclusion
Cette preuve de concept démontre quil est possible de faire tourner **OPNsense en haute dispo sous Proxmox VE**, même avec une seule IP WAN. Les briques nécessaires :
- Segmentation VLAN
- Réseau dédié pfSync
- IP virtuelle partagée (CARP)
- Script pour gérer linterface WAN
Le résultat est à la hauteur : failover transparent, synchro des états, et connexions actives qui survivent à un crash. Le point le plus délicat reste la gestion du bail WAN, mais le hook CARP règle ce problème.
🚀 Prochaine étape : préparer un nouveau cluster OPNsense HA sur Proxmox en vue de remplacer complètement ma box physique actuel. Restez à l'écoute !

View File

@@ -1,283 +0,0 @@
---
slug: opnsense-virtualization-highly-available
title: Build a Highly Available OPNsense Cluster on Proxmox VE
description: A proof of concept showing how to virtualize OPNsense on Proxmox VE, configure high availability with CARP and pfSync and handle a single WAN IP.
date: 2025-09-29
draft: false
tags:
- opnsense
- proxmox
- high-availability
categories:
- homelab
---
## Intro
I recently encountered my first real problem, my physical **OPNsense** box crashed because of a kernel panic, I've detailed what happened in that [post]({{< ref "post/10-opnsense-crash-disk-panic" >}}).
That failure made me rethink my setup. A unique firewall is a single point of failure, so to improve resilience I decided to take a new approach: **virtualize OPNsense**.
Of course, just running one VM wouldnt be enough. To get real redundancy, I need two OPNsense instances in **High Availability**, with one active and the other standing by.
Before rolling this out in my network, I wanted to demonstrate the idea in my homelab. In this post, Ill walk through the proof of concept: deploying two OPNsense VMs inside a **Proxmox VE** cluster and configuring them to provide a highly available firewall.
---
## Current Infrastructure
On top of my setup, my ISP modem, a *Freebox* in bridge mode, connects directly to the `igc0` interface of my OPNsense box, serving as the **WAN**. On `igc1`, the **LAN** is linked to my main switch using a trunk port, with VLAN 1 as the native VLAN for my management network.
The switch also connects my three Proxmox nodes, each on trunk ports with the same native VLAN. Every node has two NICs: one for general networking and the other dedicated to the Ceph storage network, which runs through a separate 2.5 Gbps switch.
Since the OPNsense crash, Ive simplified things by removing the LACP link, it wasnt adding real value:
![Current homelab network diagram](img/homelan-current-physical-layout.png)
Until recently, Proxmox networking on my cluster was very basic: each node was configured individually with no real overlay logic. That changed after I explored Proxmox SDN, where I centralized VLAN definitions across the cluster. I described that step in [this article]({{< ref "post/11-proxmox-cluster-networking-sdn" >}}).
---
## Proof of Concept
Time to move into the lab. Here are the main steps:
1. Add some VLANs in my Homelab
2. Create Fake ISP router
3. Build two OPNsense VMs
4. Configure high availability
5. Test failover
![Diagram of the POC for OPNsense high availability](img/poc-opnsense-diagram.png)
### Add VLANs in my Homelab
For this experiment, I create 3 new VLANs:
- **VLAN 101**: *POC WAN*
- **VLAN 102**: *POC LAN*
- **VLAN 103**: *POC pfSync*
In the Proxmox UI, I navigate to `Datacenter` > `SDN` > `VNets` and I click `Create`:
![Create POC VLANs in the Proxmox SDN](img/proxmox-sdn-create-poc-vlans.png)
Once the 3 new VLAN have been created, I apply the configuration.
Additionally, I add these 3 VLANs in my UniFi Controller. Here only the VLAN ID and name are needed, since the controller will propagate them through the trunks connected to my Proxmox VE nodes.
### Create “Fake ISP Box“ VM
To simulate my current ISP modem, I built a VM named `fake-freebox`. This VM routes traffic between the *POC WAN* and *Lab* networks and runs a DHCP server that serves only one lease, just like my real Freebox in bridge mode.
This VM has 2 NICs, I configure Netplan with:
- `eth0` (*POC WAN* VLAN 101): static IP address `10.101.0.254/24`
- enp6s19 (Lab VLAN 66): DHCP address given by my current OPNsense router, in upstream
```yaml
network:
version: 2
ethernets:
eth0:
addresses:
- 10.101.0.254/24
enp6s19:
dhcp4: true
```
I enable packet forward to allow this VM to route traffic:
```bash
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
sudo sysctl -p
```
Then I set up masquerading so packets leaving through the lab network wouldnt be dropped by my current OPNsense:
```bash
sudo iptables -t nat -A POSTROUTING -o enp6s19 -j MASQUERADE
sudo apt install iptables-persistent -y
sudo netfilter-persistent save
```
I install `dnsmasq` as a lightweight DHCP server:
```bash
sudo apt install dnsmasq -y
```
In `/etc/dnsmasq.conf`, I configure to serve exactly one lease (`10.101.0.150`) with DNS pointing to my current OPNsense router, in the *Lab* VLAN:
```
interface=eth0
bind-interfaces
dhcp-range=10.101.0.150,10.101.0.150,255.255.255.0,12h
dhcp-option=3,10.101.0.254 # default gateway = this VM
dhcp-option=6,192.168.66.1 # DNS server
```
I restart the `dnsmasq` service to apply the configuration:
```bash
sudo systemctl restart dnsmasq
```
The `fake-freebox` VM is now ready to serve DHCP on VLAN 101 and serve only one lease.
### Build OPNsense VMs
First I download the OPNsense ISO and upload it to one of my Proxmox nodes:
![Upload de lISO OPNsense dans Proxmox](img/proxmox-upload-opnsense-iso.png)
#### VM Creation
I create the first VM `poc-opnsense-1`, with the following settings:
- OS type: Linux(even though OPNsense is FreeBSD-based)
- Machine type: `q35`
- BIOS: `OVMF (UEFI)`, EFI storage on my Ceph pool
- Disk: 20 GiB also on Ceph
- CPU/RAM: 2 vCPU, 2 GiB RAM
- NICs:
1. VLAN 101 (POC WAN)
2. VLAN 102 (POC LAN)
3. VLAN 103 (POC pfSync)
![OPNsense VM settings in Proxmox](img/proxmox-create-poc-vm-opnsense.png)
Before booting it, I clone this VM to prepare the second one: `poc-opnsense-2`
On first boot, I hit an “access denied” error. To fix this, I enter the BIOS, go to **Device Manager > Secure Boot Configuration**, uncheck _Attempt Secure Boot_, and restart the VM:
![Disable Secure Boot in Proxmox BIOS](img/proxmox-disable-secure-boot-option.png)
#### OPNsense Installation
The VM boots on the ISO, I touch nothing until I get into the login screen:
![OPNsense CLI login screen in LiveCD](img/opnsense-vm-installation-welcome.png)
I log in as `installer` / `opnsense` and launch the installer. I select the QEMU hard disk of 20GB as destination and launch the installation:
![Barre de progression de linstallation OPNsense](img/opnsense-vm-installation-progress-bar.png)
Once the installation is finished, I remove the ISO from the drive and restart the machine.
#### OPNsense Basic Configuration
After reboot, I log in as `root` / `opnsense` and get into the CLI menu:
![Menu CLI après une installation fraîche dOPNsense](img/opnsense-vm-installation-cli-menu.png)
Using option 1, I reassigned interfaces:
![Configuration des interfaces dans OPNsense via le CLI](img/opnsense-vm-installation-assign-interfaces.png)
The WAN interface successfully pulled `10.101.0.150/24` from the `fake-freebox`. I set the LAN interface to `10.102.0.2/24` and configured a DHCP pool from `10.102.0.10` to `10.102.0.99`:
![Interface WAN OPNsense recevant une IP depuis la VM `fake-freebox`](img/opnsense-vm-installation-interfaces-configured.png)
✅ The first VM is ready, I start over for the second OPNsense VM, `poc-opnsense-2` which will have the IP `10.102.0.3`
### Configure OPNsense Highly Available
With both OPNsense VMs operational, its time to configure them from the WebGUI. To access the interface, I connected a Windows VM into the _POC LAN_ VLAN and browsed to the OPNsense IP on port 443:
![OPNsense WebGUI depuis une VM Windows](img/opnsense-vm-webgui-from-poc-lan.png)
#### Add pfSync Interface
The third NIC (`vtnet2`) is assigned to the _pfSync_ interface. This dedicated network allows the two firewalls to synchronize states on the VLAN *POC pfSync*:
![Add pfSync interface in OPNsense](img/opnsense-vm-assign-pfsync-interface.png)
I enable the interface on each instance and configure it with a static IP address:
- **poc-opnsense-1**: `10.103.0.2/24`
- **poc-opnsense-2**: `10.103.0.3/24`
Then, I add a firewall rule on each node to allow all traffic coming from this network on that *pfSync* interface:
![Create new firewall rule on pfSync interface to allow any traffic in that network](img/opnsense-vm-firewall-allow-pfsync.png)
#### Setup High Availability
Next, in `System` > `High Availability` > `Settings`.
- On the master (`poc-opnsense-1`), I configure both the `General Settings` and the `Synchronization Settings`.
- On the backup (`poc-opnsense-2`), only `General Settings` are needed, you don't want your backup overwrite the master config.
![OPNsense High Availability settings](img/opnsense-vm-high-availability-settings.png)
Once applied, I verify synchronization on the `Status` page:
![OPNsense High Availability status](img/opnsense-vm-high-availability-status.png)
#### Create Virtual IP Address
To provide a shared gateway for clients, I create a CARP Virtual IP (VIP) on the LAN interface. It is using the Common Address Redundancy Protocol. This IP is claimed by the active node and automatically fails over.
Navigate to `Interfaces` > `Virtual IPs` > `Settings`:
![Create CARP virtual IP in OPNsense](img/opnsense-vm-create-vip-carp.png)
To replicate the config, I go to `System > High Availability > Status` and click the button next to `Synchronize and reconfigure all`.
On the `Interfaces > Virtual IPs > Status` page, the master show the VIP as `MASTER`, while the backup report `BACKUP`.
#### Reconfigure DHCP
For HA, I need to adjust the DHCP setup. Since **Dnsmasq** does not support lease synchronization, both instances must serve leases independently.
On the master:
- `Services` > `Dnsmasq DNS & DHCP` > `General`: tick the `Disable HA sync` box.
- `DHCP ranges`: also tick the `Disable HA sync` box
- `DHCP options`: add the option `router [3]` with the value `10.102.0.1` (LAN VIP)
- `DHCP options`: clone the rule for `router [6]` pointing to the same VIP.
![Edit DHCP options for Dnsmasq in OPNsense](img/opnsense-vm-dnsmasq-add-option.png)
On the backup:
- `Services` > `Dnsmasq DNS & DHCP` > `General`: also tick the `Disable HA sync` box
- Set `DHCP reply delay` to `5` seconds, to give master priority to answer.
- `DHCP ranges`: Use a different pool, smaller (`10.102.0.200` -> `220`)
- but I also set the value `5` to `DHCP reply delay`. This would give enough time to the master node to provide a DHCP lease before the backup node. In `DHCP ranges`, I edit the current one and give a smaller pool, different than the master's. Here I also tick the `Disable HA sync` box.
This way, only DHCP options sync between nodes, while lease ranges stay separate.
#### WAN Interface
My ISP modem only provides a single DHCP lease, I don't want my 2 VMs compete to claim it. To handle this:
1. In Proxmox, I copy the MAC of the `net0` (WAN) interface from `poc-opnsense-1` and applied it to `poc-opnsense-2`. This way, the DHCP lease could be shared among the nodes.
⚠️ If both VMs bring up the same MAC, it can cause ARP conflicts and break connectivity, only the MASTER should keep its WAN active.
2. CARP event hook provides the possibility to run scripts, I deployed this [Gist script](https://gist.github.com/spali/2da4f23e488219504b2ada12ac59a7dc#file-10-wancarp) in `/usr/local/etc/rc.syshook.d/carp/10-wan` on both nodes. This ensures the WAN is active only on the MASTER, avoiding conflicts.
```php
#!/usr/local/bin/php
<?php
require_once("config.inc");
require_once("interfaces.inc");
require_once("util.inc");
require_once("system.inc");
$subsystem = !empty($argv[1]) ? $argv[1] : '';
$type = !empty($argv[2]) ? $argv[2] : '';
if ($type != 'MASTER' && $type != 'BACKUP') {
log_error("Carp '$type' event unknown from source '{$subsystem}'");
exit(1);
}
if (!strstr($subsystem, '@')) {
log_error("Carp '$type' event triggered from wrong source '{$subsystem}'");
exit(1);
}
$ifkey = 'wan';
if ($type === "MASTER") {
log_error("enable interface '$ifkey' due CARP event '$type'");
$config['interfaces'][$ifkey]['enable'] = '1';
write_config("enable interface '$ifkey' due CARP event '$type'", false);
interface_configure(false, $ifkey, false, false);
} else {
log_error("disable interface '$ifkey' due CARP event '$type'");
unset($config['interfaces'][$ifkey]['enable']);
write_config("disable interface '$ifkey' due CARP event '$type'", false);
interface_configure(false, $ifkey, false, false);
}
```
### Test Failover
Time for the real test!
OPNsense provides a _CARP Maintenance Mode_. With the master active, WAN was enabled only on that node. Entering maintenance mode flipped the roles: the master became backup, its WAN disabled, while the backup enabled its WAN:
![Mode maintenance CARP dans OPNsense](img/opnsense-vm-carp-status.png)
While pinging outside the network, I observed zero packet loss during the failover.
Finally, I simulated a crash by powering off the master. The backup took over seamlessly, I saw only one dropped packet, and thanks to state synchronization, even my SSH session stayed alive. 🎉
## Conclusion
This proof of concept showed that running **OPNsense in high availability on Proxmox VE** is possible, even with a single WAN IP address. To achieve this, I needed these components:
- VLAN segmentation.
- Dedicated pfSync network.
- Shared virtual IP.
- Script to manage the WAN interface.
The setup behave exactly as expected, seamless failover, synchronized firewall states, and even live sessions surviving a node crash. The most delicate part was handling the WAN lease, since my ISP modem only provides one IP, but the CARP hook script solved that challenge.
🚀 The next milestone will be to prepare a new OPNsense HA cluster with the aim to completely replace my current physical box. Stay tuned!

View File

@@ -1,55 +0,0 @@
---
slug:
title: Template
description:
date:
draft: true
tags:
- opnsense
- high-availability
- proxmox
categories:
---
## Intro
In my previous post, I've set up a PoC to validate the possibility to create a cluster of 2 **OPNsense** VMs in **Proxmox VE** and make the firewall highly available.
This time, I will cover the creation of my future OPNsense cluster from scratch, plan the cut over and finally migrate from my current physical box.
## Build the Foundation
For the real thing, I'll have to connect the WAN, coming from my ISP box, to my main switch. For that I have to add a VLAN to transport this flow to my Proxmox nodes.
### UniFi
The first thing I do is to configure my layer 2 network which is managed by UniFi. There I need to create two VLANs:
- *WAN* (20): transport the WAN between my ISP box and my Proxmox nodes.
- *pfSync* (44), communication between my OPNsense nodes.
In the UniFi controller, in `Settings` > `Networks`, I add a `New Virtual Network`. I name it `WAN` and give it the VLAN ID 20:
![unifi-add-vlan-for-wan.png](img/unifi-add-vlan-for-wan.png)
I do the same thing again for the `pfSync` VLAN with the VLAN ID 44.
I will plug my ISP box on the port 15 of my switch, which is disabled for now. I set it as active, set the native VLAN on the newly created one `WAN (20)` and disable trunking:
![unifi-enable-port-wan-vlan.png](img/unifi-enable-port-wan-vlan.png)
Once this setting applied, I make sure that only the ports where are connected my Proxmox nodes propagate these VLAN on their trunk.
We are done with UniFi configuration.
### Proxmox SDN
Now that the VLAN can reach my nodes, I want to handle it in the Proxmox SDN.
In `Datacenter` > `SDN` > `VNets`, I create a new VNet, name it `vlan20` to follow my own naming convention, give it the *WAN* alias and use the tag (ID) 20:
![proxmox-sdn-new-vnet-wan.png](img/proxmox-sdn-new-vnet-wan.png)
I also create the `vlan44` for the *pfSync* VLAN, then I apply this configuration and we are done with the SDN.
## Create the VMs
Now that the VLAN configuration is done, I can start buiding my VMs.
This time I already have the ISO uploaded

View File

@@ -1,243 +0,0 @@
---
slug: blog-deployment-obisidan-hugo-gitea-actions
title: Déploiment du Blog avec Obsidian, Hugo et Gitea Actions
description: Comment j'ai automatisé mon blog self-hosted en utilisant Obsidian, Gitea Actions et Hugo pour publier des articles directement à partir de mes notes personnelles.
date: 2025-05-02
draft: false
tags:
- obsidian
- hugo
- gitea
- gitea-actions
- docker
categories:
- blog
---
## 💡 Intro
J'ai toujours voulu partager mes expériences pour donner des idées aux autres ou les aider dans leurs projets.
Je suis constamment en train de bidouiller dans mon lab, testant de nouveaux outils et workflows. Plutôt que de conserver toutes ces expériences dans des notes privées, j'ai décidé de créer un blog où je peux les documenter et les publier facilement.
Je souhaitais que l'ensemble du processus soit automatisé, self-hosted et intégré aux outils que j'utilise déjà.
---
## 🔧 Outils
### Obsidian
J'utilisais auparavant [Notion](https://www.notion.com), mais il y a quelques mois, je suis passé à [Obsidian](https://obsidian.md/). C'est une application de prise de notes basée sur Markdown qui stocke tout localement, ce qui me donne plus de flexibilité et de contrôle.
Pour synchroniser mes notes entre mes appareils, j'utilise le [plugin Git Obsidian](https://github.com/denolehov/obsidian-git), qui enregistre les modifications dans un dépôt Git hébergé sur mon instance Gitea self-hosted.
Cette configuration permet non seulement de sauvegarder toutes mes notes avec leurs versions, mais aussi ouvre la porte à l'automatisation.
### Gitea
[Gitea](https://gitea.io/) est un service Git self-hosted similaire à GitHub, mais léger et facile à maintenir. J'y héberge mes dépôts personnels, notamment mon vault Obsidian et mon blog.
Gitea prend désormais en charge [Gitea Actions](https://docs.gitea.com/usage/actions/overview), un mécanisme de pipeline CI/CD compatible avec la syntaxe GitHub Actions.
Pour exécuter ces workflows, j'ai installé un [Gitea runner](https://gitea.com/gitea/act_runner) sur mon serveur, ce qui me permet de créer un workflow automatisé déclenché lorsque je mets à jour le contenu de mes notes, puis de reconstruire et déployer mon blog.
### Hugo
[Hugo](https://gohugo.io/) est un générateur de sites statiques rapide et flexible, écrit en Go. Il est idéal pour générer du contenu à partir de fichiers Markdown. Hugo est hautement personnalisable, prend en charge les thèmes et peut générer un site web complet en quelques secondes.
Il est idéal pour un blog basé sur des notes Obsidian et fonctionne parfaitement dans les pipelines CI/CD grâce à sa rapidité et sa simplicité.
---
## 🔁 Workflow
L'idée est simple :
1. J'écris le contenu de mon blog dans mon vault Obsidian, sous un dossier `Blog`.
2. Une fois le fichier modifié, le plugin Git Obsidian effectue automatiquement les commits et les poussent vers le dépôt Gitea.
3. Lorsque Gitea reçoit ce push, une première Gitea Action est déclenchée.
4. La première action synchronise le contenu du blog mis à jour avec un autre dépôt [Git distinct](https://git.vezpi.com/Vezpi/blog) qui héberge le contenu.
5. Dans ce dépôt, une autre Gitea Action est déclenchée.
6. La deuxième Gitea Action génère les pages web statiques tout en mettant à jour Hugo si nécessaire.
7. Le blog est maintenant mis à jour (celui que vous lisez).
De cette façon, je n'ai plus besoin de copier manuellement de fichiers ni de gérer les déploiements. Tout se déroule, de l'écriture de Markdown dans Obsidian au déploiement complet du site web.
![Workflow depuis l'écriture de notes sur Obsidian au Blog publié](img/obsidian-blog-gitea-actions-workflow.png)
---
## ⚙️ Implémentation
### Étape 1 : Configuration du vault Obsidian
Dans mon vault Obsidian, j'ai créé un dossier `Blog` contenant mes articles de blog en Markdown. Chaque article inclut les pages de garde Hugo (titre, date, brouillon, etc.). Le plugin Git est configuré pour valider et pousser automatiquement les modifications apportées au dépôt Gitea.
### Étape 2 : Lancer Gitea Runner
Le vault Obsidian est un dépôt Git privé self-hosted dans Gitea. J'utilise Docker Compose pour gérer cette instance. Pour activer les Gitea Actions, j'ai ajouté Gitea Runner à la stack.
```yaml
runner:
image: gitea/act_runner:latest
container_name: gitea_runner
restart: on-failure
environment:
- GITEA_INSTANCE_URL=https://git.vezpi.com
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
- GITEA_RUNNER_NAME=self-hosted
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
- CONFIG_FILE=/data/config.yml
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /appli/data/gitea/runner:/data
- /appli:/appli
networks:
- backend
depends_on:
- server
```
Le fichier `config.yml` contient uniquement le volume autorisé à monter dans les conteneurs
```yaml
container:
valid_volumes:
- /appli*
```
Le runner apparaît dans `Administration Area`, sous `Actions`>`Runners`. Pour obtenir le token d'enrôlement , on clique sur le bouton `Create new Runner`
![New runner visible in Gitea](img/gitea-runners-management.png)
### Étape 3 : Configurer les Gitea Actions pour le dépôt Obsidian
J'ai d'abord activé les Gitea Actions. Celles-ci sont désactivées par défaut. Cochez la case `Enable Repository Actions` dans les paramètres de ce dépôt.
J'ai créé un nouveau PAT (Personal Access Token) avec autorisation RW sur les dépôts.
![New personal access token creation in Gitea](img/gitea-new-pat.png)
J'ai ajouté le token comme secret `REPO_TOKEN` dans le dépôt.
![Add secret window for repository in Gitea](img/gitea-add-repo-secret.png)
J'ai dû créer le workflow qui lancera un conteneur et effectuera les opérations suivantes :
1. Lorsque je crée/met à jour des fichiers du dossier `Blog`
2. Checkout le dépôt actuel (vault Obsidian)
3. Clone le dépôt du blog
4. Transférer le contenu du blog depuis Obsidian
5. Commit les modifications dans le dépôt du blog
**sync_blog.yml**
```yaml
name: Synchronize content with the blog repo
on:
push:
paths:
- 'Blog/**'
jobs:
Sync:
runs-on: ubuntu
steps:
- name: Install prerequisites
run: apt update && apt install -y rsync
- name: Check out repository
uses: actions/checkout@v4
- name: Clone the blog repository
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git
- name: Transfer blog content from Obsidian
run: |
echo "Copy Markdown files"
rsync -av --delete Blog/ blog/content
# Gather all used images from markdown files
used_images=$(grep -rhoE '^!\[\[.*\]\]' blog/content | sed -E 's/!\[\[(.*)\]\]/\1/' | sort -u)
# Create the target image folder
mkdir -p blog/static/img
# Loop over each used image"
while IFS= read -r image; do
# Loop through all .md files and replace image links
grep -rl "$image" blog/content/* | while IFS= read -r md_file; do
sed -i "s|\!\[\[$image\]\]|\!\[${image// /_}\](img/${image// /_})|g" "$md_file"
done
echo "Copy the image ${image// /_} to the static folder"
cp "Images/$image" "blog/static/img/${image// /_}"
done <<< "$used_images"
- name: Commit the change to the blog repository
run: |
cd blog
git config --global user.name "Gitea Actions"
git config --global user.email "actions@local"
git config --global --add safe.directory /appli/data/blog
git add .
git commit -m "Auto-update blog content from Obsidian: $(date '+%F %T')" || echo "Nothing to commit"
git push -u origin main
```
Obsidian utilise des liens de type wiki pour les images, comme `![[nom_image.png]]`, ce qui n'est pas compatible avec Hugo par défaut. Voici comment j'ai automatisé une solution de contournement dans un workflow Gitea Actions :
- Je trouve toutes les références d'images utilisées dans des fichiers `.md`.
- Pour chaque image référencée, je mets à jour le lien dans les fichiers `.md` correspondants, comme `![nom_image](img/nom_image.png)`.
- Je copie ensuite ces images utilisées dans le répertoire statique du blog en remplaçant les espaces par des underscores.
### Étape 4 : Actions Gitea pour le dépôt du blog
Le dépôt du blog contient l'intégralité du site Hugo, y compris le contenu synchronisé et le thème.
Son workflow :
1. Checkout du dépôt du blog
2. Vérification de la mise à jour d'Hugo. Si disponible, la dernière version est téléchargée.
3. Génération du site web statique avec Hugo.
**deploy_blog.yml**
```yaml
name: Deploy
on: [push]
jobs:
Deploy:
runs-on: ubuntu
env:
BLOG_FOLDER: /blog
container:
volumes:
- /appli/data/blog:/blog
steps:
- name: Check out repository
run: |
cd ${BLOG_FOLDER}
git config --global user.name "Gitea Actions"
git config --global user.email "actions@local"
git config --global --add safe.directory ${BLOG_FOLDER}
git submodule update --init --recursive
git fetch origin
git reset --hard origin/main
- name: Get current Hugo version
run: |
current_version=$(${BLOG_FOLDER}/hugo version | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+')
echo "current_version=$current_version" | tee -a $GITEA_ENV
- name: Verify latest Hugo version
run: |
latest_version=$(curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | grep -oP '"tag_name": "\K[^"]+')
echo "latest_version=$latest_version" | tee -a $GITEA_ENV
- name: Download latest Hugo version
if: env.current_version != env.latest_version
run: |
rm -f ${BLOG_FOLDER}/{LICENSE,README.md,hugo}
curl -L https://github.com/gohugoio/hugo/releases/download/$latest_version/hugo_extended_${latest_version#v}_Linux-64bit.tar.gz -o hugo.tar.gz
tar -xzvf hugo.tar.gz -C ${BLOG_FOLDER}/
- name: Generate the static files with Hugo
run: |
rm -f ${BLOG_FOLDER}/content/posts/template.md
rm -rf ${BLOG_FOLDER}/private/* ${BLOG_FOLDER}/public/*
${BLOG_FOLDER}/hugo -D -b https://blog-dev.vezpi.me -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/private
${BLOG_FOLDER}/hugo -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/public
chown 1000:1000 -R ${BLOG_FOLDER}
```
---
## 🚀 Résultats
Ce workflow me permet de me concentrer sur l'essentiel : rédiger et peaufiner mon contenu. En automatisant le processus de publication, de la synchronisation de mes notes Obsidian à la création du blog avec Hugo, je n'ai plus à me soucier de la gestion manuelle du contenu dans un CMS.
Chaque note que je rédige peut évoluer naturellement vers un article clair et structuré, et la partie technique passe au second plan. C'est un moyen simple et efficace de transformer mes connaissances personnelles en documentation partageable.

View File

@@ -1,973 +0,0 @@
---
slug: terraform-create-vm-proxmox
title: Déployer une VM sur Proxmox avec Terraform
description: Découvrez comment déployer une VM sur Proxmox à laide de Terraform et dun template cloud-init, rendant votre infrastructure reproductible et facile à gérer.
date: 2025-05-25
draft: false
tags:
- terraform
- proxmox
- cloud-init
categories:
- homelab
---
## Intro
Lun des aspects les plus satisfaisant de la création de mon homelab, cest de pouvoir y appliquer des outils production-grade. Jai voulu définir toute mon infrastructure as code, et la première étape que jai abordée est le déploiement de Machines Virtuelles avec **Terraform** sur **Proxmox**.
Dans cet article, je vous guide pas à pas pour créer une simple VM sur Proxmox en utilisant Terraform, basée sur un template **cloud-init** que jai détaillé dans [cet article]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}). Lexécution se fait depuis un conteneur LXC dédié qui centralise toute la gestion de mon infrastructure.
📝 Le code complet utilisé dans cet article est disponible dans mon [dépôt GitHub Homelab](https://github.com/Vezpi/Homelab)
---
## Quest-ce que Terraform ?
Terraform est un outil open-source dIaC (Infrastructure as Code) développé par **HashiCorp**. Il permet de définir et de provisionner de linfrastructure à laide dun langage de configuration haut niveau appelé **HCL** (HashiCorp Configuration Language). Grâce à Terraform, on peut gérer des services cloud, des VMs, des réseaux, des enregistrements DNS, etc.
Dans mon homelab, Terraform simplifie considérablement le déploiement de VMs et rend mon environnement reproductible, permettant de tout redéployer facilement si nécessaire.
Un petit mot sur **OpenTofu**, un fork communautaire de Terraform apparu suite à des changements de licence. Il est presque entièrement compatible avec Terraform et pourrait être une bonne alternative à lavenir. Mais pour le moment, je reste sur Terraform.
---
## Les Providers Terraform pour Proxmox
Pour utiliser Terraform, il faut un provider, un plugin permettant à Terraform dinteragir avec linfrastructure. Dans le cas de Proxmox, le provider va utiliser son API. Il en existe actuellement deux :
- [**Telmate/proxmox**](https://registry.terraform.io/providers/Telmate/proxmox/latest) : Lun des premiers providers disponibles. Il est très utilisé mais peu maintenu. Facile à utiliser, avec pas mal de documentation, mais limité en fonctionnalités, avec seulement 4 ressources disponibles et aucun data source. Par exemple, je nai pas pu récupérer les informations sur les nœuds.
- [**bpg/proxmox**](https://registry.terraform.io/providers/bpg/proxmox/latest) : Un provider plus récent, développé activement (apparemment par une seule personne), avec une syntaxe plus propre et un support plus étendu. Il a été un peu plus complexe à mettre en place, mais suffisamment mature à mon goût.
Jai choisi `bpg/proxmox` car il est mieux maintenu à lheure où jécris ces lignes, et je voulais pouvoir récupérer certaines infos sur les nœuds comme leur hostname, etc.
---
## Préparer lEnvironnement
### Créer un Template Cloud-init sur Proxmox
Consultez mon précédent article sur [Proxmox - Créer un Template de VM Cloud-Init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
### Installer Terraform
Pour l'installation de Terraform dans mon conteneur LXC, je me suis basé sur la [documentation officielle](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli).
```bash
# Ensure that your system is up to date and you have installed the `gnupg`, `software-properties-common`, and `curl` packages installed. You will use these packages to verify HashiCorp's GPG signature and install HashiCorp's Debian package repository.
apt-get update && apt-get install -y gnupg software-properties-common
# Install the HashiCorp [GPG key](https://apt.releases.hashicorp.com/gpg).
wget -O- <https://apt.releases.hashicorp.com/gpg> | gpg --dearmor | tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null
# Verify the key's fingerprint.
gpg --no-default-keyring --keyring /usr/share/keyrings/hashicorp-archive-keyring.gpg --fingerprint
# Add the official HashiCorp repository to your system. The `lsb_release -cs` command finds the distribution release codename for your current system, such as `buster`, `groovy`, or `sid`.
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] <https://apt.releases.hashicorp.com> $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/hashicorp.list
# Download the package information from HashiCorp.
apt update
# Install Terraform from the new repository.
apt-get install terraform
```
### Créer un utilisateur Terraform dédié dans Proxmox
Avant que Terraform puisse interagir avec votre cluster Proxmox, il est préférable de créer un utilisateur dédié avec des permissions limitées. Vous pouvez utiliser `root@pam`, mais ce nest pas recommandé pour des raisons de sécurité.
Connectez-vous en SSH sur un nœud Proxmox avec un compte ayant les droits nécessaires, `root` dans ce cas.
1. **Créer le Rôle `TerraformUser`**
```bash
pveum role add TerraformUser -privs "\
Datastore.Allocate \
Datastore.AllocateSpace \
Datastore.Audit \
Pool.Allocate \
Sys.Audit \
Sys.Console \
Sys.Modify \
VM.Allocate \
VM.Audit \
VM.Clone \
VM.Config.CDROM \
VM.Config.Cloudinit \
VM.Config.CPU \
VM.Config.Disk \
VM.Config.HWType \
VM.Config.Memory \
VM.Config.Network \
VM.Config.Options \
VM.Console \
VM.Migrate \
VM.Monitor \
VM.PowerMgmt \
SDN.Use"
```
2. **Créer l'Utilisateur `terraformer`**
```bash
pveum user add terraformer@pve --password <password>
```
3. **Assigner le Rôle `TerraformUser` à l'Utilisateur `terraformer`**
```bash
pveum aclmod / -user terraformer@pve -role TerraformUser
```
4. Créer le Jeton API pour l'Utilisateur `terraformer`**
```bash
pveum user token add terraformer@pve terraform -expire 0 -privsep 0 -comment "Terraform token"
```
> ⚠️ **Copiez** et **conservez** bien le jeton généré !
### Installer des Clés SSH sur vos Nœuds Proxmox
Cette étape est nécessaire pour certaines ressources qui exécutent des commandes directement sur les nœuds, lorsque lAPI Proxmox ne suffit pas, comme expliqué [ici](https://registry.terraform.io/providers/bpg/proxmox/latest/docs#ssh-connection). Cest le cas avec cloud-init.
On peut utiliser un agent SSH ou une clé SSH classique. Jai choisi la clé SSH, donc on doit en générer une et linstaller sur les nœuds. Depuis la machine qui exécute Terraform.
1. **Générer une paire de clés SSH**
```bash
ssh-keygen
```
2. **L'Installer sur le(s) nœud(s) Proxmox pour l'utilisateur root**
```bash
ssh-copy-id root@<your Proxmox node>
```
---
## Déployer votre Première VM
Passons à la partie fun ! Maintenant que tout est prêt, on peut déployer une VM avec Terraform sur Proxmox. Cest parti pour le code !
### Code Terraform
> 📌 Pour rappel, tout le code est disponible dans mon [dépôt Homelab](https://github.com/Vezpi/Homelab), le projet utilisé ici se trouve [ici](https://github.com/Vezpi/Homelab/tree/main/terraform/projects/simple-vm). Noubliez pas dadapter les variables à votre environnement.
#### Structure du projet
Voici larborescence du code. Vous pouvez tout mettre dans un seul fichier `.tf`, mais je préfère l'organiser proprement.
```plaintext
terraform
`-- projects
`-- simple-vm
|-- credentials.auto.tfvars
|-- main.tf
|-- provider.tf
|-- terraform.tfvars
`-- variables.tf
```
#### `provider.tf`
Définit la configuration du provider (par exemple, Proxmox) et la manière dont Terraform s'y connecte.
```hcl
# Define the required Terraform provider block
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox" # Use the community Proxmox provider from the bpg namespace
}
}
}
# Configure the Proxmox provider with API and SSH access
provider "proxmox" {
endpoint = var.proxmox_endpoint # Proxmox API URL (e.g., https://proxmox.local:8006/api2/json)
api_token = var.proxmox_api_token # API token for authentication (should have appropriate permissions)
insecure = false # Reject self-signed or invalid TLS certificates (set to true only in trusted/test environments)
# Optional SSH settings used for VM customization via SSH
ssh {
agent = false # Do not use the local SSH agent; use key file instead
private_key = file("~/.ssh/id_ed25519") # Load SSH private key from the local file system
username = "root" # SSH username for connecting to the Proxmox host
}
}
```
#### `main.tf`
Contient la logique principale de l'infrastructure , telle que les ressources et les modules à déployer.
```hcl
# Retrieve VM templates available in Proxmox that match the specified name
data "proxmox_virtual_environment_vms" "template" {
filter {
name = "name"
values = ["${var.vm_template}"] # The name of the template to clone from
}
}
# Create a cloud-init configuration file as a Proxmox snippet
resource "proxmox_virtual_environment_file" "cloud_config" {
content_type = "snippets" # Cloud-init files are stored as snippets in Proxmox
datastore_id = "local" # Local datastore used to store the snippet
node_name = var.node_name # The Proxmox node where the file will be uploaded
source_raw {
file_name = "vm.cloud-config.yaml" # The name of the snippet file
data = <<-EOF
#cloud-config
hostname: ${var.vm_name}
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: ${var.vm_user}
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "${var.vm_user_sshkey}" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOF
}
}
# Define and provision a new VM by cloning the template and applying initialization
resource "proxmox_virtual_environment_vm" "vm" {
name = var.vm_name # VM name
node_name = var.node_name # Proxmox node to deploy the VM
tags = var.vm_tags # Optional VM tags for categorization
agent {
enabled = true # Enable the QEMU guest agent
}
stop_on_destroy = true # Ensure VM is stopped gracefully when destroyed
clone {
vm_id = data.proxmox_virtual_environment_vms.template.vms[0].vm_id # ID of the source template
node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
}
bios = var.vm_bios # BIOS type (e.g., seabios or ovmf)
machine = var.vm_machine # Machine type (e.g., q35)
cpu {
cores = var.vm_cpu # Number of CPU cores
type = "host" # Use host CPU type for best compatibility/performance
}
memory {
dedicated = var.vm_ram # RAM in MB
}
disk {
datastore_id = var.node_datastore # Datastore to hold the disk
interface = "scsi0" # Primary disk interface
size = 4 # Disk size in GB
}
initialization {
user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
datastore_id = var.node_datastore
interface = "scsi1" # Separate interface for cloud-init
ip_config {
ipv4 {
address = "dhcp" # Get IP via DHCP
}
}
}
network_device {
bridge = "vmbr0" # Use the default bridge
vlan_id = var.vm_vlan # VLAN tagging if used
}
operating_system {
type = "l26" # Linux 2.6+ kernel
}
vga {
type = "std" # Standard VGA type
}
lifecycle {
ignore_changes = [ # Ignore initialization section after first depoloyment for idempotency
initialization
]
}
}
# Output the assigned IP address of the VM after provisioning
output "vm_ip" {
value = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
description = "VM IP"
}
```
#### `variables.tf`
Déclare toutes les variables d'entrée, leurs types, leurs descriptions et leurs valeurs par défaut facultatives.
```hcl
variable "proxmox_endpoint" {
description = "Proxmox URL endpoint"
type = string
}
variable "proxmox_api_token" {
description = "Proxmox API token"
type = string
sensitive = true
}
variable "node_name" {
description = "Proxmox host for the VM"
type = string
}
variable "node_datastore" {
description = "Datastore used for VM storage"
type = string
default = "ceph-workload"
}
variable "vm_template" {
description = "Template of the VM"
type = string
default = "ubuntu-cloud"
}
variable "vm_name" {
description = "Hostname of the VM"
type = string
}
variable "vm_user" {
description = "Admin user of the VM"
type = string
default = "vez"
}
variable "vm_user_sshkey" {
description = "Admin user SSH key of the VM"
type = string
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
}
variable "vm_cpu" {
description = "Number of CPU cores of the VM"
type = number
default = 1
}
variable "vm_ram" {
description = "Number of RAM (MB) of the VM"
type = number
default = 2048
}
variable "vm_bios" {
description = "Type of BIOS used for the VM"
type = string
default = "ovmf"
}
variable "vm_machine" {
description = "Type of machine used for the VM"
type = string
default = "q35"
}
variable "vm_vlan" {
description = "VLAN of the VM"
type = number
default = 66
}
variable "vm_tags" {
description = "Tags for the VM"
type = list(any)
default = ["test"]
}
```
#### `terraform.tfvars`
Valeurs de variables chargées automatiquement qui remplacent les valeurs par défaut, utilisées pour personnaliser les déploiements.
```hcl
node_name = "zenith" # Name of the Proxmox node where the VM will be deployed
vm_name = "zenith-vm" # Desired name for the new virtual machine
vm_cpu = 2 # Number of CPU cores to allocate to the VM
vm_ram = 2048 # Amount of RAM in MB (2 GB)
vm_vlan = 66 # VLAN ID for network segmentation
```
#### `credentials.auto.tfvars`
Charge automatiquement les variables sensibles telles que les jetons API ou les informations d'identification au moment de l'exécution, elles ne se trouvent pas dans le dépôt, vous devrez donc les créer manuellement.
```hcl
proxmox_endpoint = <your Proxox endpoint>
proxmox_api_token = <your Proxmox API token for the user terraformer>
```
>💡 Pour améliorer la lisibilité, vous pouvez formater automatiquement votre code Terraform `terraform fmt`, pour appliquer les conventions de style standard, le rendant propre et cohérent.
### Initialiser lespace de travail
Avant de faire quoi que ce soit, commencez par initialiser votre environnement avec `terraform init`. Cette commande va :
- Initialiser le répertoire
- Télécharger les providers
- Installer les modules
- Préparer le backend
```bash
$ terraform init
Initializing the backend...
Initializing provider plugins...
- Finding latest version of bpg/proxmox...
- Installing bpg/proxmox v0.78.0...
- Installed bpg/proxmox v0.78.0 (self-signed, key ID F0582AD6AE97C188)
Partner and community providers are signed by their developers.
If you'd like to know more about provider signing, you can read about it here:
https://www.terraform.io/docs/cli/plugins/signing.html
Terraform has created a lock file .terraform.lock.hcl to record the provider
selections it made above. Include this file in your version control repository
so that Terraform can guarantee to make the same selections by default when
you run "terraform init" in the future.
Terraform has been successfully initialized!
You may now begin working with Terraform. Try running "terraform plan" to see
any changes that are required for your infrastructure. All Terraform commands
should now work.
If you ever set or change modules or backend configuration for Terraform,
rerun this command to reinitialize your working directory. If you forget, other
commands will detect it and remind you to do so if necessary.
```
### Déployer votre VM avec Terraform
Super, notre environnement est maintenant prêt pour le déploiement ! Avant de créer votre VM, vous pouvez exécuter `terraform plan` sur votre code et Terraform vous indiquera ce qu'il en fera !
Pour le lancer, vous devrez lancer `terraform apply`.
```bash
$ terraform apply
data.proxmox_virtual_environment_vms.template: Reading...
data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=d3292ffb-f11d-4588-9e97-fabc0f93cc39]
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
+ create
Terraform will perform the following actions:
# proxmox_virtual_environment_file.cloud_config will be created
+ resource "proxmox_virtual_environment_file" "cloud_config" {
+ content_type = "snippets"
+ datastore_id = "local"
+ file_modification_date = (known after apply)
+ file_name = (known after apply)
+ file_size = (known after apply)
+ file_tag = (known after apply)
+ id = (known after apply)
+ node_name = "zenith"
+ overwrite = true
+ timeout_upload = 1800
+ source_raw {
+ data = <<-EOT
#cloud-config
hostname: zenith-vm
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: vez
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOT
+ file_name = "vm.cloud-config.yaml"
+ resize = 0
}
}
# proxmox_virtual_environment_vm.vm will be created
+ resource "proxmox_virtual_environment_vm" "vm" {
+ acpi = true
+ bios = "ovmf"
+ id = (known after apply)
+ ipv4_addresses = (known after apply)
+ ipv6_addresses = (known after apply)
+ keyboard_layout = "en-us"
+ mac_addresses = (known after apply)
+ machine = "q35"
+ migrate = false
+ name = "zenith-vm"
+ network_interface_names = (known after apply)
+ node_name = "zenith"
+ on_boot = true
+ protection = false
+ reboot = false
+ reboot_after_update = true
+ scsi_hardware = "virtio-scsi-pci"
+ started = true
+ stop_on_destroy = true
+ tablet_device = true
+ tags = [
+ "test",
]
+ template = false
+ timeout_clone = 1800
+ timeout_create = 1800
+ timeout_migrate = 1800
+ timeout_move_disk = 1800
+ timeout_reboot = 1800
+ timeout_shutdown_vm = 1800
+ timeout_start_vm = 1800
+ timeout_stop_vm = 300
+ vm_id = (known after apply)
+ agent {
+ enabled = true
+ timeout = "15m"
+ trim = false
+ type = "virtio"
}
+ clone {
+ full = true
+ node_name = "apex"
+ retries = 1
+ vm_id = 900
}
+ cpu {
+ cores = 2
+ hotplugged = 0
+ limit = 0
+ numa = false
+ sockets = 1
+ type = "host"
+ units = 1024
}
+ disk {
+ aio = "io_uring"
+ backup = true
+ cache = "none"
+ datastore_id = "ceph-workload"
+ discard = "ignore"
+ file_format = (known after apply)
+ interface = "scsi0"
+ iothread = false
+ path_in_datastore = (known after apply)
+ replicate = true
+ size = 4
+ ssd = false
}
+ initialization {
+ datastore_id = "ceph-workload"
+ interface = "scsi1"
+ meta_data_file_id = (known after apply)
+ network_data_file_id = (known after apply)
+ type = (known after apply)
+ user_data_file_id = (known after apply)
+ vendor_data_file_id = (known after apply)
+ ip_config {
+ ipv4 {
+ address = "dhcp"
}
}
}
+ memory {
+ dedicated = 2048
+ floating = 0
+ keep_hugepages = false
+ shared = 0
}
+ network_device {
+ bridge = "vmbr0"
+ enabled = true
+ firewall = false
+ mac_address = (known after apply)
+ model = "virtio"
+ mtu = 0
+ queues = 0
+ rate_limit = 0
+ vlan_id = 66
}
+ operating_system {
+ type = "l26"
}
+ vga {
+ memory = 16
+ type = "std"
}
}
Plan: 2 to add, 0 to change, 0 to destroy.
Changes to Outputs:
+ vm_ip = (known after apply)
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
proxmox_virtual_environment_file.cloud_config: Creating...
proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
proxmox_virtual_environment_vm.vm: Creating...
proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
proxmox_virtual_environment_vm.vm: Creation complete after 2m53s [id=103]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.156"
```
✅ Voilà, on vient de créer une VM sur Proxmox en quelques minutes.
![Résumé de la nouvelle VM crée sur Proxmox](img/proxmox-terraform-new-vm.png)
### Connexion SSH
🍒 Cerise sur le gâteau : Terraform nous donne ladresse IP, et grâce à cloud-init, la connexion SSH fonctionne immédiatement.
```bash
$ ssh 192.168.66.156
The authenticity of host '192.168.66.156 (192.168.66.156)' can't be established.
ED25519 key fingerprint is SHA256:kSaXpIJYpJOBYfpVqiiH8OxhpgBY9WH/ggqFHo/20rg.
This key is not known by any other names.
Are you sure you want to continue connecting (yes/no/[fingerprint])? yes
Warning: Permanently added '192.168.66.156' (ED25519) to the list of known hosts.
Welcome to Ubuntu 24.04.2 LTS (GNU/Linux 6.8.0-60-generic x86_64)
* Documentation: https://help.ubuntu.com
* Management: https://landscape.canonical.com
* Support: https://ubuntu.com/pro
System information as of Tue May 27 21:16:51 UTC 2025
System load: 0.0 Processes: 120
Usage of /: 78.2% of 2.84GB Users logged in: 0
Memory usage: 10% IPv4 address for eth0: 192.168.66.156
Swap usage: 0%
Expanded Security Maintenance for Applications is not enabled.
0 updates can be applied immediately.
Enable ESM Apps to receive additional future security updates.
See https://ubuntu.com/esm or run: sudo pro status
The programs included with the Ubuntu system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.
Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by
applicable law.
To run a command as administrator (user "root"), use "sudo <command>".
See "man sudo_root" for details.
vez@zenith-vm:~$
```
✅ Tout fonctionne à merveille. Mon utilisateur est bien présent, avec les droits sudo, et le système est à jour.
### Idempotence
L'idempotence est un principe fondamental de Terraform : elle garantit que l'exécution répétée de votre code ne crée pas de doublons ni de modifications inattendues. Terraform vérifie ce qui est déjà en cours d'exécution et n'effectue de mises à jour qu'en cas de modification. Si rien n'a changé, Terraform se termine simplement sans modifier votre infrastructure. Vos déploiements sont ainsi sécurisés, reproductibles et fiables.
Testons ça avec un second `terraform apply`.
```bash
$ terraform apply
data.proxmox_virtual_environment_vms.template: Reading...
proxmox_virtual_environment_file.cloud_config: Refreshing state... [id=local:snippets/vm.cloud-config.yaml]
data.proxmox_virtual_environment_vms.template: Read complete after 1s [id=bc1b25f0-77d5-4b6a-b1a6-21cf39fbda17]
proxmox_virtual_environment_vm.vm: Refreshing state... [id=103]
No changes. Your infrastructure matches the configuration.
Terraform has compared your real infrastructure against your configuration and found no differences, so no changes are needed.
Apply complete! Resources: 0 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.156"
```
✅ Aucun changement, comme prévu !
### Supprimer lInfrastructure
Pour supprimer une infrastructure gérée par Terraform, exécutez simplement la commande `terraform destroy`.
Terraform vous présentera un plan détaillé de tout ce qu'il va supprimer et vous demandera confirmation avant de poursuivre. Une fois la confirmation effectuée, toutes les ressources précédemment créées seront supprimées.
```bash
$ terraform destroy
data.proxmox_virtual_environment_vms.template: Reading...
proxmox_virtual_environment_file.cloud_config: Refreshing state... [id=local:snippets/vm.cloud-config.yaml]
data.proxmox_virtual_environment_vms.template: Read complete after 1s [id=d5b47a57-8074-4ddf-83cd-a99dceab0232]
proxmox_virtual_environment_vm.vm: Refreshing state... [id=103]
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
- destroy
Terraform will perform the following actions:
# proxmox_virtual_environment_file.cloud_config will be destroyed
- resource "proxmox_virtual_environment_file" "cloud_config" {
- content_type = "snippets" -> null
- datastore_id = "local" -> null
- file_name = "vm.cloud-config.yaml" -> null
- id = "local:snippets/vm.cloud-config.yaml" -> null
- node_name = "zenith" -> null
- overwrite = true -> null
- timeout_upload = 1800 -> null
- source_raw {
- data = <<-EOT
#cloud-config
hostname: zenith-vm
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: vez
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOT -> null
- file_name = "vm.cloud-config.yaml" -> null
- resize = 0 -> null
}
}
# proxmox_virtual_environment_vm.vm will be destroyed
- resource "proxmox_virtual_environment_vm" "vm" {
- acpi = true -> null
- bios = "ovmf" -> null
- id = "103" -> null
- ipv4_addresses = [
- [
- "127.0.0.1",
],
- [
- "192.168.66.156",
],
] -> null
- ipv6_addresses = [
- [
- "::1",
],
- [
- "fe80::be24:11ff:feca:dc3f",
],
] -> null
- keyboard_layout = "en-us" -> null
- mac_addresses = [
- "00:00:00:00:00:00",
- "BC:24:11:CA:DC:3F",
] -> null
- machine = "q35" -> null
- migrate = false -> null
- name = "zenith-vm" -> null
- network_interface_names = [
- "lo",
- "eth0",
] -> null
- node_name = "zenith" -> null
- on_boot = true -> null
- protection = false -> null
- reboot = false -> null
- reboot_after_update = true -> null
- scsi_hardware = "virtio-scsi-pci" -> null
- started = true -> null
- stop_on_destroy = true -> null
- tablet_device = true -> null
- tags = [
- "test",
] -> null
- template = false -> null
- timeout_clone = 1800 -> null
- timeout_create = 1800 -> null
- timeout_migrate = 1800 -> null
- timeout_move_disk = 1800 -> null
- timeout_reboot = 1800 -> null
- timeout_shutdown_vm = 1800 -> null
- timeout_start_vm = 1800 -> null
- timeout_stop_vm = 300 -> null
- vm_id = 103 -> null
- agent {
- enabled = true -> null
- timeout = "15m" -> null
- trim = false -> null
- type = "virtio" -> null
}
- clone {
- full = true -> null
- node_name = "apex" -> null
- retries = 1 -> null
- vm_id = 900 -> null
# (1 unchanged attribute hidden)
}
- cpu {
- cores = 2 -> null
- flags = [] -> null
- hotplugged = 0 -> null
- limit = 0 -> null
- numa = false -> null
- sockets = 1 -> null
- type = "host" -> null
- units = 1024 -> null
# (2 unchanged attributes hidden)
}
- disk {
- aio = "io_uring" -> null
- backup = true -> null
- cache = "none" -> null
- datastore_id = "ceph-workload" -> null
- discard = "ignore" -> null
- file_format = "raw" -> null
- interface = "scsi0" -> null
- iothread = false -> null
- path_in_datastore = "vm-103-disk-1" -> null
- replicate = true -> null
- size = 4 -> null
- ssd = false -> null
# (2 unchanged attributes hidden)
}
- initialization {
- datastore_id = "ceph-workload" -> null
- interface = "scsi1" -> null
- user_data_file_id = "local:snippets/vm.cloud-config.yaml" -> null
# (4 unchanged attributes hidden)
- ip_config {
- ipv4 {
- address = "dhcp" -> null
# (1 unchanged attribute hidden)
}
}
- user_account {
- keys = [
- "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCepytMtegvj8pf89dC8mWoGVAlvdpCkIThLcOiGW39ZCyRY9yXloAniaMXTAC8PHKbe4yPX4N0OovM5jNS5ofa1HQ1xEimgn9y185aSEf/J9msEW8LLy/+yb38vsDj5vYpRaurjUXfDVFti8rO1UWZ8zKuFvXJ18nBFJpViA8mHqwxUFnoNRyAMs4O8Fh3V8EnELOxb+T8p+nTTzBnYhUgYNPt61S3iAqD6QYHjelUzs8VxcxOdP/qO664jxQ7N96/zNsaTuV13FR286BuAelg3LUCpjZ2fy2mrSeKM6xOIY1mwPPCoglPiiHLTxZzo33pR0MAnDV9A3rJb3jBBifB vez-key",
] -> null
- password = (sensitive value) -> null
- username = "vez" -> null
}
}
- memory {
- dedicated = 2048 -> null
- floating = 0 -> null
- keep_hugepages = false -> null
- shared = 0 -> null
# (1 unchanged attribute hidden)
}
- network_device {
- bridge = "vmbr0" -> null
- disconnected = false -> null
- enabled = true -> null
- firewall = false -> null
- mac_address = "BC:24:11:CA:DC:3F" -> null
- model = "virtio" -> null
- mtu = 0 -> null
- queues = 0 -> null
- rate_limit = 0 -> null
- vlan_id = 66 -> null
# (1 unchanged attribute hidden)
}
- operating_system {
- type = "l26" -> null
}
- vga {
- memory = 16 -> null
- type = "std" -> null
# (1 unchanged attribute hidden)
}
}
Plan: 0 to add, 0 to change, 2 to destroy.
Changes to Outputs:
- vm_ip = "192.168.66.156" -> null
Do you really want to destroy all resources?
Terraform will destroy all your managed infrastructure, as shown above.
There is no undo. Only 'yes' will be accepted to confirm.
Enter a value: yes
proxmox_virtual_environment_vm.vm: Destroying... [id=103]
proxmox_virtual_environment_vm.vm: Destruction complete after 5s
proxmox_virtual_environment_file.cloud_config: Destroying... [id=local:snippets/vm.cloud-config.yaml]
proxmox_virtual_environment_file.cloud_config: Destruction complete after 0s
Destroy complete! Resources: 2 destroyed.
```
💣 **Boom** ! La VM est supprimée, prête à être redéployée si besoin.
---
## Conclusion
Dans cet article, on a vu comment déployer une VM sur Proxmox avec Terraform : depuis un template cloud-init jusquà une machine fonctionnelle accessible en SSH. Cette base me donne un environnement fiable, rapide à reconstruire.
Prochaine étape : transformer ce projet en module réutilisable, lintégrer avec Ansible pour aller plus loin, et préparer le terrain pour déployer mon cluster Kubernetes. À suivre !

View File

@@ -1,972 +0,0 @@
---
slug: terraform-create-vm-proxmox
title: Deploy VM on Proxmox with Terraform
description: Learn how to deploy a VM on Proxmox using Terraform and a cloud-init template, making your infrastructure reproducible and easy to manage.
date: 2025-05-25
draft: false
tags:
- terraform
- proxmox
- cloud-init
categories:
- homelab
---
## Intro
One of the most satisfying parts of building a homelab is getting to apply production-grade tooling to a personal setup. Ive been working on defining my entire infrastructure as code, and the first piece I tackled was VM deployment with **Terraform** on **Proxmox**.
In this article, Ill walk you through creating a simple VM on Proxmox using Terraform, based on a **cloud-init** template I covered in [this article]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}). Everything runs from a dedicated LXC container where I manage my whole infrastructure.
📝 The full code used in this article is available in my [Homelab GitHub repository](https://github.com/Vezpi/Homelab)
---
## What is Terraform?
Terraform is an open-source IaC tool developed by **HashiCorp**. It lets you define and provision infrastructure using a high-level configuration language called **HCL** (HashiCorp Configuration Language). With Terraform, you can manage cloud services, VMs, networks, DNS records, and more.
In my homelab, Terraform can simplify VM deployment and make my environment reproducible and easily re-deploy everything from scratch as needed.
A quick mention of **OpenTofu**, it is a community-driven fork of Terraform that emerged after some licensing changes. It's almost fully compatible with Terraform and could be a great alternative down the line. But for now, Im sticking with Terraform.
---
## Proxmox Terraform Providers
To use Terraform, youll need a provider, a plugin that lets Terraform interact with your infrastructure, in the case of Proxmox, it will interact with the Proxmox API. There are currently two providers:
- [**Telmate/proxmox**](https://registry.terraform.io/providers/Telmate/proxmox/latest): One of the original providers. Its widely used but not very actively maintained. Its simple to use, with plenty of documentation available online, but has limited features, with only 4 resources are available and no data sources: for example, I wasnt able to retrieve node resource details.
- [**bpg/proxmox**](https://registry.terraform.io/providers/bpg/proxmox/latest): A newer and more actively developed provider, apparently developed by a single guy, with cleaner syntax and much wider resources support. It was harder to setup but I found it mature enough to work with it.
I chose the `bpg/proxmox` provider because its better maintained at the time of writing and I needed to retrieve nodes values, such as their hostname, etc.
---
## Prepare the Environment
### Create a Cloud-init VM Template in Proxmox
Check out my previous article on [Proxmox - Create a Cloud-Init VM Template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
### Install Terraform
For the Terraform installation into my LXC container, I followed the [documentation](https://developer.hashicorp.com/terraform/tutorials/aws-get-started/install-cli).
```bash
# Ensure that your system is up to date and you have installed the `gnupg`, `software-properties-common`, and `curl` packages installed. You will use these packages to verify HashiCorp's GPG signature and install HashiCorp's Debian package repository.
apt-get update && apt-get install -y gnupg software-properties-common
# Install the HashiCorp [GPG key](https://apt.releases.hashicorp.com/gpg).
wget -O- <https://apt.releases.hashicorp.com/gpg> | gpg --dearmor | tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null
# Verify the key's fingerprint.
gpg --no-default-keyring --keyring /usr/share/keyrings/hashicorp-archive-keyring.gpg --fingerprint
# Add the official HashiCorp repository to your system. The `lsb_release -cs` command finds the distribution release codename for your current system, such as `buster`, `groovy`, or `sid`.
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] <https://apt.releases.hashicorp.com> $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/hashicorp.list
# Download the package information from HashiCorp.
apt update
# Install Terraform from the new repository.
apt-get install terraform
```
### Create a Dedicated Terraform User on Proxmox
Before Terraform can interact with your Proxmox cluster, you want to create a dedicated user with limited privileges. You could use the `root@pam` but I wouldn't recommended it for security perspectives.
SSH into any Proxmox node using a privileged account, `root` in this case.
1. **Create the Role `TerraformUser`**
```bash
pveum role add TerraformUser -privs "\
Datastore.Allocate \
Datastore.AllocateSpace \
Datastore.Audit \
Pool.Allocate \
Sys.Audit \
Sys.Console \
Sys.Modify \
VM.Allocate \
VM.Audit \
VM.Clone \
VM.Config.CDROM \
VM.Config.Cloudinit \
VM.Config.CPU \
VM.Config.Disk \
VM.Config.HWType \
VM.Config.Memory \
VM.Config.Network \
VM.Config.Options \
VM.Console \
VM.Migrate \
VM.Monitor \
VM.PowerMgmt \
SDN.Use"
```
2. **Create the User `terraformer`**
```bash
pveum user add terraformer@pve --password <password>
```
3. **Assign the Role `TerraformUser` to the User `terraformer`**
```bash
pveum aclmod / -user terraformer@pve -role TerraformUser
```
4. **Create API Token for the user `terraformer`**
```bash
pveum user token add terraformer@pve terraform -expire 0 -privsep 0 -comment "Terraform token"
```
> ⚠️ **Copy** and save **the** token given!
### Install SSH Keys on your Proxmox Nodes
This step is required if youre using certain resources that need to run commands directly on the node to perform actions that are not supported by Proxmox API, detailed [here](https://registry.terraform.io/providers/bpg/proxmox/latest/docs#ssh-connection), this would be the case for our setup with cloud-init.
We could either use a SSH-agent or a SSH key, I preferred the latter, so we have to generate a ssh-key and install it on your Proxmox nodes. You generate these keys from where Terraform is installed.
1. **Generate the SSH key pair**
```bash
ssh-keygen
```
2. **Install it on your Proxmox node(s) for the root user**
```bash
ssh-copy-id root@<your Proxmox node>
```
---
## Deploy your First VM
Let's dive into the fun part! Now we have our environment ready to deploy VM using Terraform on Proxmox, let's code!
### Terraform Code
> 📌 Reminder, you can find all the code I have written in my [Homelab repo](https://github.com/Vezpi/Homelab), the following code is located [here](https://github.com/Vezpi/Homelab/tree/main/terraform/projects/simple-vm). Don't forget to match your variables with your environment!
#### Code Structure
Here is the code structure, you can keep all your code in a single `.tf` file but I prefer to keep it clean.
```plaintext
terraform
`-- projects
`-- simple-vm
|-- credentials.auto.tfvars
|-- main.tf
|-- provider.tf
|-- terraform.tfvars
`-- variables.tf
```
#### `provider.tf`
Defines the provider configuration (e.g., Proxmox) and how Terraform connects to it.
```hcl
# Define the required Terraform provider block
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox" # Use the community Proxmox provider from the bpg namespace
}
}
}
# Configure the Proxmox provider with API and SSH access
provider "proxmox" {
endpoint = var.proxmox_endpoint # Proxmox API URL (e.g., https://proxmox.local:8006/api2/json)
api_token = var.proxmox_api_token # API token for authentication (should have appropriate permissions)
insecure = false # Reject self-signed or invalid TLS certificates (set to true only in trusted/test environments)
# Optional SSH settings used for VM customization via SSH
ssh {
agent = false # Do not use the local SSH agent; use key file instead
private_key = file("~/.ssh/id_ed25519") # Load SSH private key from the local file system
username = "root" # SSH username for connecting to the Proxmox host
}
}
```
#### `main.tf`
Contains the core infrastructure logic, such as resources and modules to be deployed.
```hcl
# Retrieve VM templates available in Proxmox that match the specified name
data "proxmox_virtual_environment_vms" "template" {
filter {
name = "name"
values = ["${var.vm_template}"] # The name of the template to clone from
}
}
# Create a cloud-init configuration file as a Proxmox snippet
resource "proxmox_virtual_environment_file" "cloud_config" {
content_type = "snippets" # Cloud-init files are stored as snippets in Proxmox
datastore_id = "local" # Local datastore used to store the snippet
node_name = var.node_name # The Proxmox node where the file will be uploaded
source_raw {
file_name = "vm.cloud-config.yaml" # The name of the snippet file
data = <<-EOF
#cloud-config
hostname: ${var.vm_name}
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: ${var.vm_user}
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "${var.vm_user_sshkey}" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOF
}
}
# Define and provision a new VM by cloning the template and applying initialization
resource "proxmox_virtual_environment_vm" "vm" {
name = var.vm_name # VM name
node_name = var.node_name # Proxmox node to deploy the VM
tags = var.vm_tags # Optional VM tags for categorization
agent {
enabled = true # Enable the QEMU guest agent
}
stop_on_destroy = true # Ensure VM is stopped gracefully when destroyed
clone {
vm_id = data.proxmox_virtual_environment_vms.template.vms[0].vm_id # ID of the source template
node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
}
bios = var.vm_bios # BIOS type (e.g., seabios or ovmf)
machine = var.vm_machine # Machine type (e.g., q35)
cpu {
cores = var.vm_cpu # Number of CPU cores
type = "host" # Use host CPU type for best compatibility/performance
}
memory {
dedicated = var.vm_ram # RAM in MB
}
disk {
datastore_id = var.node_datastore # Datastore to hold the disk
interface = "scsi0" # Primary disk interface
size = 4 # Disk size in GB
}
initialization {
user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
datastore_id = var.node_datastore
interface = "scsi1" # Separate interface for cloud-init
ip_config {
ipv4 {
address = "dhcp" # Get IP via DHCP
}
}
}
network_device {
bridge = "vmbr0" # Use the default bridge
vlan_id = var.vm_vlan # VLAN tagging if used
}
operating_system {
type = "l26" # Linux 2.6+ kernel
}
vga {
type = "std" # Standard VGA type
}
lifecycle {
ignore_changes = [ # Ignore initialization section after first depoloyment for idempotency
initialization
]
}
}
# Output the assigned IP address of the VM after provisioning
output "vm_ip" {
value = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
description = "VM IP"
}
```
#### `variables.tf`
Declares all input variables, their types, descriptions, and optional default values.
```hcl
variable "proxmox_endpoint" {
description = "Proxmox URL endpoint"
type = string
}
variable "proxmox_api_token" {
description = "Proxmox API token"
type = string
sensitive = true
}
variable "node_name" {
description = "Proxmox host for the VM"
type = string
}
variable "node_datastore" {
description = "Datastore used for VM storage"
type = string
default = "ceph-workload"
}
variable "vm_template" {
description = "Template of the VM"
type = string
default = "ubuntu-cloud"
}
variable "vm_name" {
description = "Hostname of the VM"
type = string
}
variable "vm_user" {
description = "Admin user of the VM"
type = string
default = "vez"
}
variable "vm_user_sshkey" {
description = "Admin user SSH key of the VM"
type = string
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
}
variable "vm_cpu" {
description = "Number of CPU cores of the VM"
type = number
default = 1
}
variable "vm_ram" {
description = "Number of RAM (MB) of the VM"
type = number
default = 2048
}
variable "vm_bios" {
description = "Type of BIOS used for the VM"
type = string
default = "ovmf"
}
variable "vm_machine" {
description = "Type of machine used for the VM"
type = string
default = "q35"
}
variable "vm_vlan" {
description = "VLAN of the VM"
type = number
default = 66
}
variable "vm_tags" {
description = "Tags for the VM"
type = list(any)
default = ["test"]
}
```
#### `terraform.tfvars`
Automatically loaded variable values that override defaults, used to customize deployments.
```hcl
node_name = "zenith" # Name of the Proxmox node where the VM will be deployed
vm_name = "zenith-vm" # Desired name for the new virtual machine
vm_cpu = 2 # Number of CPU cores to allocate to the VM
vm_ram = 2048 # Amount of RAM in MB (2 GB)
vm_vlan = 66 # VLAN ID for network segmentation
```
#### `credentials.auto.tfvars`
Automatically loads sensitive variables like API tokens or credentials at runtime, it is not in the repository so you will have to create it manually.
```hcl
proxmox_endpoint = <your Proxox endpoint>
proxmox_api_token = <your Proxmox API token for the user terraformer>
```
> 💡 To improve readability, you can automatically formats your Terraform code `terraform fmt`, to follow standard style conventions, making it clean and consistent.
### Initialize your Workspace
The first step when working with Terraform is to initialize your workspace. You will do that with the `terraform init` command, which will:
- Initializes the working directory
- Downloads required providers
- Installs modules
- Sets up the backend
```bash
$ terraform init
Initializing the backend...
Initializing provider plugins...
- Finding latest version of bpg/proxmox...
- Installing bpg/proxmox v0.78.0...
- Installed bpg/proxmox v0.78.0 (self-signed, key ID F0582AD6AE97C188)
Partner and community providers are signed by their developers.
If you'd like to know more about provider signing, you can read about it here:
https://www.terraform.io/docs/cli/plugins/signing.html
Terraform has created a lock file .terraform.lock.hcl to record the provider
selections it made above. Include this file in your version control repository
so that Terraform can guarantee to make the same selections by default when
you run "terraform init" in the future.
Terraform has been successfully initialized!
You may now begin working with Terraform. Try running "terraform plan" to see
any changes that are required for your infrastructure. All Terraform commands
should now work.
If you ever set or change modules or backend configuration for Terraform,
rerun this command to reinitialize your working directory. If you forget, other
commands will detect it and remind you to do so if necessary.
```
### Deploy your Terraform Infrastructure
Great, we now have our environment ready for deployment! Before creating your VM, you can run `terraform plan` against your code and Terraform will tell you what it will do with it!
To actually launch it, you will need to launch `terraform apply`
```bash
$ terraform apply
data.proxmox_virtual_environment_vms.template: Reading...
data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=d3292ffb-f11d-4588-9e97-fabc0f93cc39]
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
+ create
Terraform will perform the following actions:
# proxmox_virtual_environment_file.cloud_config will be created
+ resource "proxmox_virtual_environment_file" "cloud_config" {
+ content_type = "snippets"
+ datastore_id = "local"
+ file_modification_date = (known after apply)
+ file_name = (known after apply)
+ file_size = (known after apply)
+ file_tag = (known after apply)
+ id = (known after apply)
+ node_name = "zenith"
+ overwrite = true
+ timeout_upload = 1800
+ source_raw {
+ data = <<-EOT
#cloud-config
hostname: zenith-vm
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: vez
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOT
+ file_name = "vm.cloud-config.yaml"
+ resize = 0
}
}
# proxmox_virtual_environment_vm.vm will be created
+ resource "proxmox_virtual_environment_vm" "vm" {
+ acpi = true
+ bios = "ovmf"
+ id = (known after apply)
+ ipv4_addresses = (known after apply)
+ ipv6_addresses = (known after apply)
+ keyboard_layout = "en-us"
+ mac_addresses = (known after apply)
+ machine = "q35"
+ migrate = false
+ name = "zenith-vm"
+ network_interface_names = (known after apply)
+ node_name = "zenith"
+ on_boot = true
+ protection = false
+ reboot = false
+ reboot_after_update = true
+ scsi_hardware = "virtio-scsi-pci"
+ started = true
+ stop_on_destroy = true
+ tablet_device = true
+ tags = [
+ "test",
]
+ template = false
+ timeout_clone = 1800
+ timeout_create = 1800
+ timeout_migrate = 1800
+ timeout_move_disk = 1800
+ timeout_reboot = 1800
+ timeout_shutdown_vm = 1800
+ timeout_start_vm = 1800
+ timeout_stop_vm = 300
+ vm_id = (known after apply)
+ agent {
+ enabled = true
+ timeout = "15m"
+ trim = false
+ type = "virtio"
}
+ clone {
+ full = true
+ node_name = "apex"
+ retries = 1
+ vm_id = 900
}
+ cpu {
+ cores = 2
+ hotplugged = 0
+ limit = 0
+ numa = false
+ sockets = 1
+ type = "host"
+ units = 1024
}
+ disk {
+ aio = "io_uring"
+ backup = true
+ cache = "none"
+ datastore_id = "ceph-workload"
+ discard = "ignore"
+ file_format = (known after apply)
+ interface = "scsi0"
+ iothread = false
+ path_in_datastore = (known after apply)
+ replicate = true
+ size = 4
+ ssd = false
}
+ initialization {
+ datastore_id = "ceph-workload"
+ interface = "scsi1"
+ meta_data_file_id = (known after apply)
+ network_data_file_id = (known after apply)
+ type = (known after apply)
+ user_data_file_id = (known after apply)
+ vendor_data_file_id = (known after apply)
+ ip_config {
+ ipv4 {
+ address = "dhcp"
}
}
}
+ memory {
+ dedicated = 2048
+ floating = 0
+ keep_hugepages = false
+ shared = 0
}
+ network_device {
+ bridge = "vmbr0"
+ enabled = true
+ firewall = false
+ mac_address = (known after apply)
+ model = "virtio"
+ mtu = 0
+ queues = 0
+ rate_limit = 0
+ vlan_id = 66
}
+ operating_system {
+ type = "l26"
}
+ vga {
+ memory = 16
+ type = "std"
}
}
Plan: 2 to add, 0 to change, 0 to destroy.
Changes to Outputs:
+ vm_ip = (known after apply)
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
proxmox_virtual_environment_file.cloud_config: Creating...
proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
proxmox_virtual_environment_vm.vm: Creating...
proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
proxmox_virtual_environment_vm.vm: Creation complete after 2m53s [id=103]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.156"
```
✅ Done! Weve successfully created our first VM on Proxmox using Terraform in just a few minutes.
![Résumé de la nouvelle VM crée sur Proxmox](img/proxmox-terraform-new-vm.png)
### SSH Connection
🍒 Cherry on the cake: Terraform gives us the IP address, and thanks to cloud-init, SSH is ready to go.
```bash
$ ssh 192.168.66.156
The authenticity of host '192.168.66.156 (192.168.66.156)' can't be established.
ED25519 key fingerprint is SHA256:kSaXpIJYpJOBYfpVqiiH8OxhpgBY9WH/ggqFHo/20rg.
This key is not known by any other names.
Are you sure you want to continue connecting (yes/no/[fingerprint])? yes
Warning: Permanently added '192.168.66.156' (ED25519) to the list of known hosts.
Welcome to Ubuntu 24.04.2 LTS (GNU/Linux 6.8.0-60-generic x86_64)
* Documentation: https://help.ubuntu.com
* Management: https://landscape.canonical.com
* Support: https://ubuntu.com/pro
System information as of Tue May 27 21:16:51 UTC 2025
System load: 0.0 Processes: 120
Usage of /: 78.2% of 2.84GB Users logged in: 0
Memory usage: 10% IPv4 address for eth0: 192.168.66.156
Swap usage: 0%
Expanded Security Maintenance for Applications is not enabled.
0 updates can be applied immediately.
Enable ESM Apps to receive additional future security updates.
See https://ubuntu.com/esm or run: sudo pro status
The programs included with the Ubuntu system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.
Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by
applicable law.
To run a command as administrator (user "root"), use "sudo <command>".
See "man sudo_root" for details.
vez@zenith-vm:~$
```
✅ This works like a charm, wonderful. We can see that my user is already created, it has all sudo permissions and the system is up-to-date.
### Idempotency
Idempotency is a core principle in Terraform that ensures running your code multiple times won't create duplicates or unexpected changes. Terraform checks whats already running and only makes updates if something has actually changed. If nothing has changed, Terraform simply exits without modifying your infrastructure. This makes your deployments safe, repeatable, and easy to trust.
So let's `terraform apply` a second time to see what is happening
```bash
$ terraform apply
data.proxmox_virtual_environment_vms.template: Reading...
proxmox_virtual_environment_file.cloud_config: Refreshing state... [id=local:snippets/vm.cloud-config.yaml]
data.proxmox_virtual_environment_vms.template: Read complete after 1s [id=bc1b25f0-77d5-4b6a-b1a6-21cf39fbda17]
proxmox_virtual_environment_vm.vm: Refreshing state... [id=103]
No changes. Your infrastructure matches the configuration.
Terraform has compared your real infrastructure against your configuration and found no differences, so no changes are needed.
Apply complete! Resources: 0 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.156"
```
✅ No change as expected!
### Remove your Infrastructure
To remove a Terraform-managed infrastructure, simply run the command `terraform destroy`.
Terraform will show you a detailed plan of everything its about to delete, and ask for confirmation before proceeding. Once confirmed, it removes all resources it previously created.
```bash
$ terraform destroy
data.proxmox_virtual_environment_vms.template: Reading...
proxmox_virtual_environment_file.cloud_config: Refreshing state... [id=local:snippets/vm.cloud-config.yaml]
data.proxmox_virtual_environment_vms.template: Read complete after 1s [id=d5b47a57-8074-4ddf-83cd-a99dceab0232]
proxmox_virtual_environment_vm.vm: Refreshing state... [id=103]
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
- destroy
Terraform will perform the following actions:
# proxmox_virtual_environment_file.cloud_config will be destroyed
- resource "proxmox_virtual_environment_file" "cloud_config" {
- content_type = "snippets" -> null
- datastore_id = "local" -> null
- file_name = "vm.cloud-config.yaml" -> null
- id = "local:snippets/vm.cloud-config.yaml" -> null
- node_name = "zenith" -> null
- overwrite = true -> null
- timeout_upload = 1800 -> null
- source_raw {
- data = <<-EOT
#cloud-config
hostname: zenith-vm
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: vez
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOT -> null
- file_name = "vm.cloud-config.yaml" -> null
- resize = 0 -> null
}
}
# proxmox_virtual_environment_vm.vm will be destroyed
- resource "proxmox_virtual_environment_vm" "vm" {
- acpi = true -> null
- bios = "ovmf" -> null
- id = "103" -> null
- ipv4_addresses = [
- [
- "127.0.0.1",
],
- [
- "192.168.66.156",
],
] -> null
- ipv6_addresses = [
- [
- "::1",
],
- [
- "fe80::be24:11ff:feca:dc3f",
],
] -> null
- keyboard_layout = "en-us" -> null
- mac_addresses = [
- "00:00:00:00:00:00",
- "BC:24:11:CA:DC:3F",
] -> null
- machine = "q35" -> null
- migrate = false -> null
- name = "zenith-vm" -> null
- network_interface_names = [
- "lo",
- "eth0",
] -> null
- node_name = "zenith" -> null
- on_boot = true -> null
- protection = false -> null
- reboot = false -> null
- reboot_after_update = true -> null
- scsi_hardware = "virtio-scsi-pci" -> null
- started = true -> null
- stop_on_destroy = true -> null
- tablet_device = true -> null
- tags = [
- "test",
] -> null
- template = false -> null
- timeout_clone = 1800 -> null
- timeout_create = 1800 -> null
- timeout_migrate = 1800 -> null
- timeout_move_disk = 1800 -> null
- timeout_reboot = 1800 -> null
- timeout_shutdown_vm = 1800 -> null
- timeout_start_vm = 1800 -> null
- timeout_stop_vm = 300 -> null
- vm_id = 103 -> null
- agent {
- enabled = true -> null
- timeout = "15m" -> null
- trim = false -> null
- type = "virtio" -> null
}
- clone {
- full = true -> null
- node_name = "apex" -> null
- retries = 1 -> null
- vm_id = 900 -> null
# (1 unchanged attribute hidden)
}
- cpu {
- cores = 2 -> null
- flags = [] -> null
- hotplugged = 0 -> null
- limit = 0 -> null
- numa = false -> null
- sockets = 1 -> null
- type = "host" -> null
- units = 1024 -> null
# (2 unchanged attributes hidden)
}
- disk {
- aio = "io_uring" -> null
- backup = true -> null
- cache = "none" -> null
- datastore_id = "ceph-workload" -> null
- discard = "ignore" -> null
- file_format = "raw" -> null
- interface = "scsi0" -> null
- iothread = false -> null
- path_in_datastore = "vm-103-disk-1" -> null
- replicate = true -> null
- size = 4 -> null
- ssd = false -> null
# (2 unchanged attributes hidden)
}
- initialization {
- datastore_id = "ceph-workload" -> null
- interface = "scsi1" -> null
- user_data_file_id = "local:snippets/vm.cloud-config.yaml" -> null
# (4 unchanged attributes hidden)
- ip_config {
- ipv4 {
- address = "dhcp" -> null
# (1 unchanged attribute hidden)
}
}
- user_account {
- keys = [
- "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCepytMtegvj8pf89dC8mWoGVAlvdpCkIThLcOiGW39ZCyRY9yXloAniaMXTAC8PHKbe4yPX4N0OovM5jNS5ofa1HQ1xEimgn9y185aSEf/J9msEW8LLy/+yb38vsDj5vYpRaurjUXfDVFti8rO1UWZ8zKuFvXJ18nBFJpViA8mHqwxUFnoNRyAMs4O8Fh3V8EnELOxb+T8p+nTTzBnYhUgYNPt61S3iAqD6QYHjelUzs8VxcxOdP/qO664jxQ7N96/zNsaTuV13FR286BuAelg3LUCpjZ2fy2mrSeKM6xOIY1mwPPCoglPiiHLTxZzo33pR0MAnDV9A3rJb3jBBifB vez-key",
] -> null
- password = (sensitive value) -> null
- username = "vez" -> null
}
}
- memory {
- dedicated = 2048 -> null
- floating = 0 -> null
- keep_hugepages = false -> null
- shared = 0 -> null
# (1 unchanged attribute hidden)
}
- network_device {
- bridge = "vmbr0" -> null
- disconnected = false -> null
- enabled = true -> null
- firewall = false -> null
- mac_address = "BC:24:11:CA:DC:3F" -> null
- model = "virtio" -> null
- mtu = 0 -> null
- queues = 0 -> null
- rate_limit = 0 -> null
- vlan_id = 66 -> null
# (1 unchanged attribute hidden)
}
- operating_system {
- type = "l26" -> null
}
- vga {
- memory = 16 -> null
- type = "std" -> null
# (1 unchanged attribute hidden)
}
}
Plan: 0 to add, 0 to change, 2 to destroy.
Changes to Outputs:
- vm_ip = "192.168.66.156" -> null
Do you really want to destroy all resources?
Terraform will destroy all your managed infrastructure, as shown above.
There is no undo. Only 'yes' will be accepted to confirm.
Enter a value: yes
proxmox_virtual_environment_vm.vm: Destroying... [id=103]
proxmox_virtual_environment_vm.vm: Destruction complete after 5s
proxmox_virtual_environment_file.cloud_config: Destroying... [id=local:snippets/vm.cloud-config.yaml]
proxmox_virtual_environment_file.cloud_config: Destruction complete after 0s
Destroy complete! Resources: 2 destroyed.
```
💣 **Boom**! The VM has been destroyed and we can now redeploy another instance at will!
---
## Conclusion
In this post, we explored how to deploy a VM on Proxmox using Terraform, starting from a cloud-init template and ending with a working virtual machine you can SSH into. With this setup in place, I now have a reliable way to deploy and destroy VMs quickly and consistently.
My next step is to turn this foundation into a reusable module and use it as a base for future projects, like integrating with Ansible for further automation and even deploying my Kubernetes cluster. Stay tuned!

View File

@@ -1,425 +0,0 @@
---
slug: blog-deployment-ci-cd-pipeline-gitea-actions
title: Pipeline CI/CD du Déploiment du Blog avec Gitea Actions
description: Comment j'ai sécurisé le déploiement automatisé de mon blog self-hosted construit avec Hugo en mettant en place un pipeline CI/CD à l'aide de Gitea Actions.
date: 2025-06-05
draft: false
tags:
- hugo
- docker
- ci-cd
- gitea-actions
categories:
- blog
---
## Intro
Maintenant que mon blog est en ligne, je ne peux plus vraiment me permettre de le faire tomber à la moindre modification. J'avais bien une version "preview" de mon blog qui était générée en même temps que la version publique, mais celle-ci reposait sur le même contenu et me permettait uniquement de voir les pages en mode brouillon.
Le blog étant redéployé de façon automatique à chaque modification du contenu dans Obsidian, détaillé dans [cet article]({{< ref "post/2-blog-deployment-obisidan-hugo-gitea-actions" >}}), je ne vérifie pas systématiquement si le déploiement s'est planté ou non. Je devais donc trouver une solution pour le protéger de mes bêtises.
## Sécuriser le Déploiement du Blog
Aujourd'hui mon blog se redéploie automatiquement à chaque modification de la branche `main` du [dépôt Git](https://git.vezpi.com/Vezpi/Blog) de mon instance **Gitea** via une **Gitea Actions**. Chaque modification apportée à mon vault **Obsidian** est poussée automatiquement dans cette branche.
![Workflow depuis l'écriture de notes sur Obsidian au Blog publié](img/obsidian-blog-gitea-actions-workflow.png)
### Créer une Nouvelle Branche
La première partie, la plus simple, a donc été de créer une nouvelle branche qui allait recevoir ces modifications. J'ai donc crée la branche `preview` dans ce dépôt. Ensuite j'ai modifié la branche cible recevant les modifications dans le workflow de mon dépôt Git Obsidian.
![Create the preview branch from the main branch in Gitea](img/gitea-create-new-branch.png)
### Containeriser le Blog
Le blog généré avec **Hugo** est sous forme de fichiers statiques, qui sont localisés sur un filesystem de ma Machine Virtuelle `dockerVM`, et montés sous forme de volume dans un conteneur `nginx`.
Je ne voulais plus avoir ces fichiers montés dans un volume, mais qu'ils soient générés au lancement du conteneur, ainsi je pourrai faire vivre plusieurs instances indépendantes de mon blog.
Pour la 2ème partie, il me faut donc construire une image **Docker** qui doit réaliser ces opérations:
1. Télécharger le binaire `hugo`.
2. Cloner le dépôt Git de mon blog.
3. Générer les pages statiques avec `hugo`.
4. Servir les pages web.
#### Construire l'Image Docker
Un conteneur Docker est basé sur une image, un modèle contenant déjà des instructions exécutées à l'avance. Une fois le conteneur démarré, il peut alors exécuter une autre série dactions, comme lancer un serveur ou un script.
Pour construire une image Docker, il faut un fichier appelé `Dockerfile` qui regroupe les actions a effectuer pour sa construction, on peut également y ajouter d'autres fichiers, comme ici un script nommé `entrypoint.sh` qui sera alors le processus lancé au démarrage du conteneur.
```plaintext
docker/
├── Dockerfile
├── entrypoint.sh
└── nginx.conf
```
##### Dockerfile
Dans mon cas je voulais que l'image, basé sur `nginx`, contienne la configuration du serveur web, le binaire `hugo`, qu'elle soit capable de cloner mon dépôt Git et qu'elle lance un script à son exécution.
```Dockerfile
FROM nginx:stable
ARG HUGO_VERSION
ENV HUGO_VERSION=${HUGO_VERSION}
ENV HUGO_DEST=/usr/share/nginx/html
# Install dependencies
RUN apt-get update && apt-get install -y \
curl \
git \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Download Hugo
RUN curl -sSL https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_Linux-64bit.tar.gz \
| tar -xz -C /usr/local/bin hugo
# Add entrypoint script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Copy custom nginx config
COPY nginx.conf /etc/nginx/conf.d/default.conf
# Nginx serves on port 80
EXPOSE 80
# Set default entrypoint
ENTRYPOINT ["/entrypoint.sh"]
```
##### entrypoint.sh
Par défaut, au lancement d'un conteneur `nginx`, il se contente de lancer le serveur web. Ici je voulais qu'avant cela, qu'il clone une branche du dépôt Git de mon blog et qu'à partir de cette branche, il génère les fichiers statiques avec `hugo`.
```sh
#!/bin/sh
set -e
# Configuration
REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
URL="${URL:-blog.vezpi.com}"
BRANCH="${BRANCH:-preview}"
CLONE_DIR="${CLONE_DIR:-/blog}"
DRAFTS=""
# Add drafts for preview
if [ "$BRANCH" = "preview" ]; then
echo "- Adding draft pages to be generated"
DRAFTS="--buildDrafts"
fi
# Clone repo
echo "- Cloning $REPO_URL (branch: $BRANCH)..."
git clone --depth 1 --recurse-submodules --branch "$BRANCH" "$REPO_URL" "$CLONE_DIR"
# Generate static files with hugo
echo "- Building site with Hugo v$HUGO_VERSION in $HUGO_DEST..."
hugo --source "$CLONE_DIR" --destination "$HUGO_DEST" --baseURL="https://${URL}" "$DRAFTS" --logLevel info --cleanDestinationDir --gc --panicOnWarning --printI18nWarnings
# Start nginx
echo "- Starting Nginx..."
exec nginx -g 'daemon off;'
```
Je spécifie ici à `hugo` de sortir en erreur dès qu'un warning est généré, cela empêchera le conteneur de démarré correctement et pouvoir identifier un éventuel problème.
Je peux maintenant construire mon image Docker, avec comme argument, la version d'Hugo désiré :
```bash
$ docker build --build-arg HUGO_VERSION=0.147.6 .
[+] Building 4.3s (11/11) FINISHED
=> [internal] load build definition from Dockerfile
=> => transferring dockerfile: 786B
=> [internal] load metadata for docker.io/library/nginx:stable
=> [internal] load .dockerignore
=> => transferring context: 2B
=> [1/6] FROM docker.io/library/nginx:stable@sha256:eaa7e36decc3421fc04478c586dfea0d931cebe47d5bc0b15d758a32ba51126f
=> [internal] load build context
=> => transferring context: 1.16kB
=> CACHED [2/6] RUN apt-get update && apt-get install -y curl git ca-certificates && rm -rf /var/lib/apt/lists/*
=> CACHED [3/6] RUN curl -sSL https://github.com/gohugoio/hugo/releases/download/v0.147.6/hugo_extended_0.147.6_Linux-64bit.tar.gz | tar -xz -C /usr/local/bin hugo
=> [4/6] COPY entrypoint.sh /entrypoint.sh
=> [5/6] RUN chmod +x /entrypoint.sh
=> [6/6] COPY nginx.conf /etc/nginx/conf.d/default.conf
=> exporting to image
=> => exporting layers
=> => writing image sha256:07cbeea704f3af16dc71a0890539776c87a95972a6c8f7d4fb24ea0eeab17032
```
✅ Maintenant que j'ai mon image, je peux lancer une nouvelle instance de mon blog, sans me préoccuper de ce que j'ai actuellement sur le FS de ma VM. Je peux également choisir à partir de quelle branche de mon dépôt Git, le contenu sera généré.
Mais je ne peux toujours pas prédire si ces instances sont fonctionnelles, il me faut pouvoir les **tester** et enfin les **déployer**.
Afin d'automatiser ce déploiement, je vais construire un **Pipeline CI/CD**.
### Pipeline CI/CD
Un pipeline CI/CD est une suite d'étapes automatisées qui permettent de tester, construire et déployer une application. La partie **CI (Intégration Continue)** vérifie que le code fonctionne bien à chaque modification (par exemple en lançant des tests), tandis que la **CD (Déploiement Continu)** soccupe de livrer automatiquement ce code vers un environnement de test ou de production. Cela rend les mises à jour plus rapides, fiables et régulières.
Il existe plusieurs outils :
- **CI** : Jenkins, Travis CI, etc.
- **CD** Argo CD, Flux CD, etc.
- **CI/CD** : GitLab CI/CD, GitHub Actions, etc.
Dans mon cas je vais réutiliser les **Gitea Actions** très similaire à GitHub Actions, une plateforme CI/CD intégré à **Gitea**, qui fonctionne avec des workflows définis dans des fichiers `YAML` placés dans le dépôt Git.
À chaque événement, comme un push ou une création de tag, Gitea Actions va lancer automatiquement une série détapes (tests, build, déploiement…) dans un environnement isolé, basé sur des conteneurs Docker.
#### Gitea Runners
Les workflows Gitea Actions utilisent des **Gitea Runners**, ils récupèrent les jobs et les lancent dans des conteneurs Docker, assurant un environnement propre et isolé pour chaque étape.
Comme les instances de mon blog sont gérées par `docker` (précisément par `docker compose`), je voulais que le `runner` puisse interagir avec le démon Docker de `dockerVM`. Pour ce faire, j'ai du ajouter au catalogue de mon `runner` l'image `docker:cli` et lui donner accès au `docker.socket` de la VM.
Voici la nouvelle configuration de mon `runner` dans ma stack Gitea, gérée par `docker compose` également :
```yaml
runner:
image: gitea/act_runner:latest
container_name: gitea_runner
restart: always
environment:
- GITEA_INSTANCE_URL=https://git.vezpi.com
- GITEA_RUNNER_REGISTRATION_TOKEN=<token>
- GITEA_RUNNER_NAME=self-hosted
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
- CONFIG_FILE=/data/config.yml
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /appli/data/gitea/runner:/data
- /appli:/appli
networks:
- backend
depends_on:
- server
```
#### Workflow
Avant j'utilisais un workflow simple qui était déclenché à chaque push sur la branche `main` du dépôt Git de mon blog, voici ce qu'il faisait :
1. Checkout de mon dépôt Git dans le FS de ma VM `dockerVM`.
2. Télécharge le binaire `hugo` si une nouvelle version était disponible.
3. Génère les fichiers statiques du blog avec `hugo`.
Maintenant voici ce que le nouveau workflow fait :
1. **Check-Rebuild** : Vérifie si une nouvelle version d'Hugo est disponible et vérifie si le dossier `docker` du dépôt a été modifié.
2. **Build** : Si le job précédent le suggère, reconstruit l'image Docker `vezpi-blog` et la tag avec la version d'Hugo.
3. **Deploy-Staging** : Déploie le blog avec la branche `preview` sur une URL de test avec `docker compose`.
4. **Test-Staging** : Vérifie que le blog en version `preview` répond et fonctionne.
5. **Merge** : Merge la branche `preview` avec la branche `main`.
6. **Deploy-Production** : Déploie le blog avec la branche `main`, la version publique avec `docker compose`.
7. **Test-Production** : Vérifie que le blog en version `main` répond et fonctionne.
8. **Clean** : Supprime l'ancienne image Docker.
Voici un exemple de déploiement après un commit automatique généré par **Obsidian**, on peut voir ici que l'image Docker n'a pas été reconstruire car il n'y avait pas de nouvelle version d'Hugo disponible et que le dossier `docker` n'avait pas été modifié, de ce fait, le dernier job `Clean` n'était pas non plus nécessaire.
![Gitea Actions workflow for blog deployment](img/gitea-actions-deploy-blog-workflow.png)
#### Code
Le workflow est écrit en `YAML` et doit être localisé dans le répertoire `.gitea/workflows/` du dépôt Git.
```yaml
name: Blog Deployment
on:
push:
branches:
- preview
env:
DOCKER_IMAGE: vezpi-blog
jobs:
Check-Rebuild:
runs-on: docker
defaults:
run:
shell: sh
outputs:
latest_hugo_version: ${{ steps.get_latest.outputs.version }}
current_hugo_version: ${{ steps.get_current.outputs.version }}
newer_version_available: ${{ steps.compare.outputs.version }}
current_docker_image: ${{ steps.current_docker.outputs.image }}
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
steps:
- name: Checkout Repository
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
- name: Check Latest Hugo Version
id: get_latest
run: |
apk add curl
latest_version=$(curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | grep tag_name | sed -E 's/.*"v([^"]+)".*/\1/')
echo "version=$latest_version" | tee -a $GITEA_OUTPUT
- name: Check Current Hugo Version
id: get_current
run: |
current_version=$(docker image ls ${DOCKER_IMAGE} --format '{{.Tag}}' | head -n1)
echo "version=$current_version" | tee -a $GITEA_OUTPUT
- name: Compare Current and Latest Hugo Versions
id: compare
run: |
if [ "${{ steps.get_latest.outputs.version }}" != "${{ steps.get_current.outputs.version }}" ]; then
new_version_available=true
echo "New version available: ${{ steps.get_latest.outputs.version }}"
else
new_version_available=false
echo "Current version is the latest: ${{ steps.get_latest.outputs.version }}"
fi
echo "version=$new_version_available" | tee -a $GITEA_OUTPUT
- name: Get Current Docker Image ID
id: current_docker
run: |
current_image=$(docker image ls ${DOCKER_IMAGE}:latest --format '{{.ID}}' | head -n1)
echo "image=$current_image" | tee -a $GITEA_OUTPUT
- name: Check Changes in the Docker Folder
id: docker_folder
run: |
if git diff --name-only origin/main | grep -q '^docker/';
then
docker_folder_changed=true
echo "Change detected in the /docker folder"
else
docker_folder_changed=false
echo "No change in the /docker folder"
fi
echo "changed=$docker_folder_changed" | tee -a $GITEA_OUTPUT
Build:
needs: Check-Rebuild
if: needs.Check-Rebuild.outputs.newer_version_available == 'true' || needs.Check-Rebuild.outputs.docker_folder_changed == 'true'
runs-on: docker
defaults:
run:
shell: sh
steps:
- name: Checkout Repository
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
- name: Build Docker Image
run: |
cd docker
docker build \
--build-arg HUGO_VERSION=${{ needs.Check-Rebuild.outputs.latest_hugo_version }} \
--tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} \
.
docker tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} ${DOCKER_IMAGE}:latest
Deploy-Staging:
needs:
- Check-Rebuild
- Build
if: always() && needs.Check-Rebuild.result == 'success' && (needs.Build.result == 'skipped' || needs.Build.result == 'success')
runs-on: docker
container:
volumes:
- /appli/docker/blog:/blog
defaults:
run:
shell: sh
env:
CONTAINER_NAME: blog_staging
steps:
- name: Launch Blog Deployment
run: |
cd /blog
docker compose down ${CONTAINER_NAME}
docker compose up -d ${CONTAINER_NAME}
sleep 5
echo "- Displaying container logs"
docker compose logs ${CONTAINER_NAME}
Test-Staging:
needs: Deploy-Staging
runs-on: ubuntu
env:
URL: "https://blog-dev.vezpi.com/en/"
steps:
- name: Check HTTP Response
run: |
code=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
echo "HTTP response code: $code"
if [ "$code" -ne 200 ]; then
echo "❌ Service is not healthy (HTTP $code)"
exit 1
else
echo "✅ Service is healthy"
fi
Merge:
needs: Test-Staging
runs-on: ubuntu
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
- name: Merge preview Branch on main
run: |
git merge --ff-only origin/preview
git push origin main
Deploy-Production:
needs: Merge
runs-on: docker
container:
volumes:
- /appli/docker/blog:/blog
defaults:
run:
shell: sh
env:
CONTAINER_NAME: blog_production
steps:
- name: Launch Blog Deployment
run: |
cd /blog
docker compose down ${CONTAINER_NAME}
docker compose up -d ${CONTAINER_NAME}
sleep 5
echo "- Displaying container logs"
docker compose logs ${CONTAINER_NAME}
Test-Production:
needs: Deploy-Production
runs-on: ubuntu
env:
URL: "https://blog.vezpi.com/en/"
steps:
- name: Check HTTP Response
run: |
code=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
echo "HTTP response code: $code"
if [ "$code" -ne 200 ]; then
echo "❌ Service is not healthy (HTTP $code)"
exit 1
else
echo "✅ Service is healthy"
fi
Clean:
needs:
- Check-Rebuild
- Build
- Test-Production
runs-on: docker
defaults:
run:
shell: sh
steps:
- name: Remove Old Docker Image
run: |
docker image rm ${{ needs.Check-Rebuild.outputs.current_docker_image }} --force
```
## Résultats
Avec ce nouveau workflow et ce pipeline CI/CD, je suis beaucoup plus serein lorsque je modifie le contenu de mes pages depuis Obsidian en Markdown ou lorsque je modifie la configuration d'`hugo`.
La prochaine étape sera de renforcer l'étape des tests, un simple `curl` n'est clairement pas suffisant pour s'assurer le bon fonctionnement du blog. Je veux aussi rajouter un système de notification pour m'alerter lorsque le workflow se plante. A bientôt !

View File

@@ -1,425 +0,0 @@
---
slug: blog-deployment-ci-cd-pipeline-gitea-actions
title: Blog Deployment CI/CD Pipeline using Gitea Actions
description: How I secured the automated deployment of my self-hosted blog built with Hugo by setting up a CI/CD pipeline using Gitea Actions.
date: 2025-06-05
draft: false
tags:
- hugo
- docker
- ci-cd
- gitea-actions
categories:
- blog
---
## Intro
Now that my blog is live, I cant really afford to break it with every single change. I did have a "preview" version of the blog that was generated alongside the public version, but it relied on the same content and only allowed me to view pages in draft mode.
Since the blog is automatically redeployed every time I modify content in Obsidian, as explained in [this article]({{< ref "post/2-blog-deployment-obisidan-hugo-gitea-actions" >}}), I don't always check whether the deployment failed or not. So I needed a way to protect it from my mistakes.
## Securing the Blog Deployment
Currently, my blog redeploys automatically on every change to the `main` branch of the [Git repository](https://git.vezpi.com/Vezpi/Blog) hosted on my **Gitea** instance, using a **Gitea Actions** workflow. Every change made in my **Obsidian** vault is automatically pushed to this branch.
![Workflow depuis l'écriture de notes sur Obsidian au Blog publié](img/obsidian-blog-gitea-actions-workflow.png)
### Create a New Branch
The first and easiest step was to create a new branch to receive these changes. So I created a `preview` branch in this repository and then updated the target branch in the workflow of my Obsidian Git repo.
![Create the preview branch from the main branch in Gitea](img/gitea-create-new-branch.png)
### Containerize the Blog
The blog generated with **Hugo**, is made of static files stored on the filesystem of my Virtual Machine `dockerVM`, and mounted as a volume in an `nginx` container.
I wanted to stop using mounted volumes and instead have the files generated at container startup, allowing me to run multiple independent instances of the blog.
So the second part was to build a **Docker** image that would:
1. Download the `hugo` binary.
2. Clone my blogs Git repository.
3. Generate static pages with `hugo`.
4. Serve the web pages.
#### Build the Docker Image
A Docker container is based on an image, a template that already contains pre-executed instructions. When the container starts, it can then execute a new set of actions like running a server or script.
To build a Docker image, you need a file called `Dockerfile` which defines the actions to perform during the build. You can also add other files, like a script named `entrypoint.sh` that will be executed when the container starts.
```plaintext
docker/
├── Dockerfile
├── entrypoint.sh
└── nginx.conf
```
##### Dockerfile
In my case, I wanted the image, based on `nginx`, to include the web server configuration, the `hugo` binary, the ability to clone my Git repo, and to run a script on startup.
```Dockerfile
FROM nginx:stable
ARG HUGO_VERSION
ENV HUGO_VERSION=${HUGO_VERSION}
ENV HUGO_DEST=/usr/share/nginx/html
# Install dependencies
RUN apt-get update && apt-get install -y \
curl \
git \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Download Hugo
RUN curl -sSL https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_Linux-64bit.tar.gz \
| tar -xz -C /usr/local/bin hugo
# Add entrypoint script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Copy custom nginx config
COPY nginx.conf /etc/nginx/conf.d/default.conf
# Nginx serves on port 80
EXPOSE 80
# Set default entrypoint
ENTRYPOINT ["/entrypoint.sh"]
```
##### entrypoint.sh
By default, a `nginx` container simply starts the web server. But here I wanted it to first clone a specific branch of my blog repository, and then generate the static files using `hugo`.
```sh
#!/bin/sh
set -e
# Configuration
REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
URL="${URL:-blog.vezpi.com}"
BRANCH="${BRANCH:-preview}"
CLONE_DIR="${CLONE_DIR:-/blog}"
DRAFTS=""
# Add drafts for preview
if [ "$BRANCH" = "preview" ]; then
echo "- Adding draft pages to be generated"
DRAFTS="--buildDrafts"
fi
# Clone repo
echo "- Cloning $REPO_URL (branch: $BRANCH)..."
git clone --depth 1 --recurse-submodules --branch "$BRANCH" "$REPO_URL" "$CLONE_DIR"
# Generate static files with hugo
echo "- Building site with Hugo v$HUGO_VERSION in $HUGO_DEST..."
hugo --source "$CLONE_DIR" --destination "$HUGO_DEST" --baseURL="https://${URL}" "$DRAFTS" --logLevel info --cleanDestinationDir --gc --panicOnWarning --printI18nWarnings
# Start nginx
echo "- Starting Nginx..."
exec nginx -g 'daemon off;'
```
Ive configured `hugo` to fail if any warning occurs, this way, the container wont start if something goes wrong, making problems easier to catch.
I can now build my Docker image and pass the desired Hugo version as a build argument:
```bash
$ docker build --build-arg HUGO_VERSION=0.147.6 .
[+] Building 4.3s (11/11) FINISHED
=> [internal] load build definition from Dockerfile
=> => transferring dockerfile: 786B
=> [internal] load metadata for docker.io/library/nginx:stable
=> [internal] load .dockerignore
=> => transferring context: 2B
=> [1/6] FROM docker.io/library/nginx:stable@sha256:eaa7e36decc3421fc04478c586dfea0d931cebe47d5bc0b15d758a32ba51126f
=> [internal] load build context
=> => transferring context: 1.16kB
=> CACHED [2/6] RUN apt-get update && apt-get install -y curl git ca-certificates && rm -rf /var/lib/apt/lists/*
=> CACHED [3/6] RUN curl -sSL https://github.com/gohugoio/hugo/releases/download/v0.147.6/hugo_extended_0.147.6_Linux-64bit.tar.gz | tar -xz -C /usr/local/bin hugo
=> [4/6] COPY entrypoint.sh /entrypoint.sh
=> [5/6] RUN chmod +x /entrypoint.sh
=> [6/6] COPY nginx.conf /etc/nginx/conf.d/default.conf
=> exporting to image
=> => exporting layers
=> => writing image sha256:07cbeea704f3af16dc71a0890539776c87a95972a6c8f7d4fb24ea0eeab17032
```
✅ Now that I have my image, I can launch new instances of my blog without worrying about what's on the filesystem of my VM. I can also choose which Git branch the content should be generated from.
But I still cant guarantee that these instances actually work, I need a way to **test** and then **deploy** them automatically.
To do that, Im going to build a **CI/CD Pipeline**.
### CI/CD Pipeline
A CI/CD pipeline is a series of automated steps to test, build, and deploy an application. The **CI (Continuous Integration)** part checks that the code works with every change (e.g., by running tests), while the **CD (Continuous Deployment)** part automatically delivers the code to a test or production environment. This makes updates faster, more reliable, and more frequent.
There are different types of tools:
- **CI**: Jenkins, Travis CI, etc.
- **CD**: Argo CD, Flux CD, etc.
- **CI/CD**: GitLab CI/CD, GitHub Actions, etc.
In my case, Im reusing **Gitea Actions**, which is very similar to GitHub Actions. Its a CI/CD platform built into **Gitea**, using `YAML` workflow files stored in the Git repository.
Every time an event occurs, like a push or a tag), Gitea Actions automatically runs a set of steps (tests, build, deploy…) in an isolated environment based on Docker containers.
#### Gitea Runners
Gitea Actions workflows run through **Gitea Runners**. These fetch the jobs and execute them inside Docker containers, providing a clean and isolated environment for each step.
Since my blog instances are managed by `docker` (specifically `docker compose`), I needed the runner to interact with the Docker daemon on `dockerVM`. To achieve this, I added the `docker:cli` image to the runner catalog and gave it access to the VMs `docker.socket`.
Here is the new configuration of my `runner` in my Gitea stack, also managed via `docker compose`:
```yaml
runner:
image: gitea/act_runner:latest
container_name: gitea_runner
restart: always
environment:
- GITEA_INSTANCE_URL=https://git.vezpi.com
- GITEA_RUNNER_REGISTRATION_TOKEN=<token>
- GITEA_RUNNER_NAME=self-hosted
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
- CONFIG_FILE=/data/config.yml
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- /appli/data/gitea/runner:/data
- /appli:/appli
networks:
- backend
depends_on:
- server
```
#### Workflow
Previously, I had a simple workflow triggered on every push to the `main` branch of my blogs Git repository. It did:
1. Checkout the Git repo into the `dockerVM` filesystem.
2. Download the latest Hugo binary if needed.
3. Generate the static blog files with Hugo.
Now, heres what the new workflow does:
1. **Check-Rebuild**: Checks if a new Hugo version is available and if the `docker` folder in the repo has changed.
2. **Build**: If the previous job requires it, rebuilds the Docker image `vezpi-blog` and tags it with the Hugo version.
3. **Deploy-Staging**: Deploys the blog using the `preview` branch to a test URL via `docker compose`.
4. **Test-Staging**: Verifies that the `preview` version of the blog responds and works
5. **Merge**: Merges the `preview` branch into `main`.
6. **Deploy-Production**: Deploys the blog using the `main` branch (public version) with `docker compose`.
7. **Test-Production**: Verifies that the public blog is up and working.
8. **Clean**: Deletes the old Docker image.
Heres an example of a deployment triggered by an automatic commit from **Obsidian**. You can see that the Docker image wasnt rebuilt because no new Hugo version was available and the `docker` folder hadnt changed, so the final `Clean` job wasnt necessary either.
![Gitea Actions workflow for blog deployment](img/gitea-actions-deploy-blog-workflow.png)
#### Code
The workflow is written in `YAML` and must be located in the `.gitea/workflows/` folder of the Git repository.
```yaml
name: Blog Deployment
on:
push:
branches:
- preview
env:
DOCKER_IMAGE: vezpi-blog
jobs:
Check-Rebuild:
runs-on: docker
defaults:
run:
shell: sh
outputs:
latest_hugo_version: ${{ steps.get_latest.outputs.version }}
current_hugo_version: ${{ steps.get_current.outputs.version }}
newer_version_available: ${{ steps.compare.outputs.version }}
current_docker_image: ${{ steps.current_docker.outputs.image }}
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
steps:
- name: Checkout Repository
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
- name: Check Latest Hugo Version
id: get_latest
run: |
apk add curl
latest_version=$(curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | grep tag_name | sed -E 's/.*"v([^"]+)".*/\1/')
echo "version=$latest_version" | tee -a $GITEA_OUTPUT
- name: Check Current Hugo Version
id: get_current
run: |
current_version=$(docker image ls ${DOCKER_IMAGE} --format '{{.Tag}}' | head -n1)
echo "version=$current_version" | tee -a $GITEA_OUTPUT
- name: Compare Current and Latest Hugo Versions
id: compare
run: |
if [ "${{ steps.get_latest.outputs.version }}" != "${{ steps.get_current.outputs.version }}" ]; then
new_version_available=true
echo "New version available: ${{ steps.get_latest.outputs.version }}"
else
new_version_available=false
echo "Current version is the latest: ${{ steps.get_latest.outputs.version }}"
fi
echo "version=$new_version_available" | tee -a $GITEA_OUTPUT
- name: Get Current Docker Image ID
id: current_docker
run: |
current_image=$(docker image ls ${DOCKER_IMAGE}:latest --format '{{.ID}}' | head -n1)
echo "image=$current_image" | tee -a $GITEA_OUTPUT
- name: Check Changes in the Docker Folder
id: docker_folder
run: |
if git diff --name-only origin/main | grep -q '^docker/';
then
docker_folder_changed=true
echo "Change detected in the /docker folder"
else
docker_folder_changed=false
echo "No change in the /docker folder"
fi
echo "changed=$docker_folder_changed" | tee -a $GITEA_OUTPUT
Build:
needs: Check-Rebuild
if: needs.Check-Rebuild.outputs.newer_version_available == 'true' || needs.Check-Rebuild.outputs.docker_folder_changed == 'true'
runs-on: docker
defaults:
run:
shell: sh
steps:
- name: Checkout Repository
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
- name: Build Docker Image
run: |
cd docker
docker build \
--build-arg HUGO_VERSION=${{ needs.Check-Rebuild.outputs.latest_hugo_version }} \
--tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} \
.
docker tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} ${DOCKER_IMAGE}:latest
Deploy-Staging:
needs:
- Check-Rebuild
- Build
if: always() && needs.Check-Rebuild.result == 'success' && (needs.Build.result == 'skipped' || needs.Build.result == 'success')
runs-on: docker
container:
volumes:
- /appli/docker/blog:/blog
defaults:
run:
shell: sh
env:
CONTAINER_NAME: blog_staging
steps:
- name: Launch Blog Deployment
run: |
cd /blog
docker compose down ${CONTAINER_NAME}
docker compose up -d ${CONTAINER_NAME}
sleep 5
echo "- Displaying container logs"
docker compose logs ${CONTAINER_NAME}
Test-Staging:
needs: Deploy-Staging
runs-on: ubuntu
env:
URL: "https://blog-dev.vezpi.com/en/"
steps:
- name: Check HTTP Response
run: |
code=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
echo "HTTP response code: $code"
if [ "$code" -ne 200 ]; then
echo "❌ Service is not healthy (HTTP $code)"
exit 1
else
echo "✅ Service is healthy"
fi
Merge:
needs: Test-Staging
runs-on: ubuntu
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: main
- name: Merge preview Branch on main
run: |
git merge --ff-only origin/preview
git push origin main
Deploy-Production:
needs: Merge
runs-on: docker
container:
volumes:
- /appli/docker/blog:/blog
defaults:
run:
shell: sh
env:
CONTAINER_NAME: blog_production
steps:
- name: Launch Blog Deployment
run: |
cd /blog
docker compose down ${CONTAINER_NAME}
docker compose up -d ${CONTAINER_NAME}
sleep 5
echo "- Displaying container logs"
docker compose logs ${CONTAINER_NAME}
Test-Production:
needs: Deploy-Production
runs-on: ubuntu
env:
URL: "https://blog.vezpi.com/en/"
steps:
- name: Check HTTP Response
run: |
code=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
echo "HTTP response code: $code"
if [ "$code" -ne 200 ]; then
echo "❌ Service is not healthy (HTTP $code)"
exit 1
else
echo "✅ Service is healthy"
fi
Clean:
needs:
- Check-Rebuild
- Build
- Test-Production
runs-on: docker
defaults:
run:
shell: sh
steps:
- name: Remove Old Docker Image
run: |
docker image rm ${{ needs.Check-Rebuild.outputs.current_docker_image }} --force
```
## Results
With this new workflow and CI/CD pipeline, I feel much more confident when editing my content in Markdown with Obsidian or tweaking my `hugo` config.
The next step will be to improve the testing phase, a simple `curl` isnt enough to truly verify that the blog is working properly. I also want to add a notification system to alert me when the workflow fails. See you soon!

View File

@@ -1,380 +0,0 @@
---
slug: notification-system-gotify-vs-ntfy
title: Test de Gotify et Ntfy, un système de notifications self-hosted
description: Gotify ou Ntfy ? J'ai testé les deux pour créer un système de notifications fiable et self-hosted pour mon homelab, et intégré à un pipeline CI/CD.
date: 2025-06-13
draft: false
tags:
- notification
- ntfy
- gotify
- ci-cd
categories:
- homelab
---
## Intro
Pour savoir ce qui se passe dans mon homelab et être averti quand quelque chose ne va pas, je veux mettre en place un système de notifications où (presque) n'importe quoi pourrait m'envoyer un message que je recevrais sur mon mobile.
Par le passé, jutilisais **Pushover**, qui était très bien, mais je veux explorer de nouvelles options, plus modernes et éventuellement self-hosted.
## Choisir le Bon Système de Notifications
Les éléments clés pour déterminer le bon système pour moi seraient :
- **Application Android** : obligatoire, une interface élégante et intuitive est important.
- **Intégration** : je veux que le service soit intégré partout où je veux être notifié.
- **Self-hosted** : lhéberger moi-même est toujours mieux pour la confidentialité.
Après une recherche rapide, les outils les plus adaptés sur le marché sont :
- **Ntfy**
- **Gotify**
Étant donné les commentaires sur internet et après avoir testé rapidement les deux applications Android, je ne peux pas vraiment décider. Je pense que Ntfy est la meilleure option, mais je vais installer et tester les deux pour me faire une idée !
## Gotify
Javais entendu parler de Gotify il y a quelque temps, en fait avant même de regarder d'autres alternatives, j'avais celui-ci en tête. Jai rapidement jeté un œil à sa [documentation](https://gotify.net/docs/) et cela semble assez simple.
### Installation
Comme dhabitude, je vais déployer le serveur Gotify avec `docker compose` sur `dockerVM`, une VM hébergeant mes applications sous forme de conteneurs Docker. Je crée un nouveau dossier `gotify` dans `/appli/docker/` et je colle mon template de `docker-compose.yml` dedans.
`docker-compose.yml`
```yaml
services:
gotify:
image: gotify/server
container_name: gotify
volumes:
- /appli/data/gotify/data/:/app/data
environment:
- TZ=Europe/Paris
- GOTIFY_DEFAULTUSER_NAME=${GOTIFY_DEFAULTUSER_NAME}
- GOTIFY_DEFAULTUSER_PASS=${GOTIFY_DEFAULTUSER_PASS}
networks:
- web
labels:
- traefik.enable=true
- traefik.http.routers.gotify.rule=Host(`gotify.vezpi.me`)
- traefik.http.routers.gotify.entrypoints=https
- traefik.http.routers.gotify.tls.certresolver=letsencrypt
- traefik.http.services.gotify.loadbalancer.server.port=80
restart: always
networks:
web:
external: true
```
`.env`
```
GOTIFY_DEFAULTUSER_NAME=vez
GOTIFY_DEFAULTUSER_PASS=<password>
```
Dans la [documentation](https://gotify.net/docs/config), je vois que plusieurs moteurs de base de données peuvent être utilisés, par défaut cest **sqlite3** qui est utilisé, ce qui ira très bien pour le test. Passer à **PostgreSQL** pourrait être une option si je décide de garder Gotify. Sur cette même page, je vois les différentes variables denvironnement que je peux utiliser pour configurer le serveur depuis le fichier `docker-compose.yml`.
Quand mes fichiers de configuration sont prêts, je crée une nouvelle entrée dans mon plugin Caddy sur OPNsense pour rediriger ma nouvelle URL Gotify : [https://gotify.vezpi.me](https://gotify.vezpi.me).
Je crée également le dossier `/appli/data/gotify/data/` dans `dockerVM` pour le monter comme volume et stocker les données :
```bash
mkdir -p /appli/data/gotify/data/
```
Enfin, je lance la stack docker :
```bash
$ docker compose up -d
[+] Running 5/5
✔ gotify Pulled
✔ 63ce8e957633 Pull complete
✔ e7def9680541 Pull complete
✔ 9a1821c438b4 Pull complete
✔ ad316556c9ff Pull complete
[+] Running 1/1
✔ Container gotify Started
```
✅ Atteindre lURL [https://gotify.vezpi.me](https://gotify.vezpi.me) maffiche la page de connexion Gotify :
![Gotify login page](img/gotify-login-page.png)
Après connexion, jaccède au tableau de bord, sans messages évidemment :
![Gotify dashboard on a fresh installation](img/gotify-dashboard-no-messages.png)
### Créer une Application
Pour permettre lenvoi de messages, je dois dabord créer une application pour laquelle les messages seront regroupés. Cela peut se faire de deux manières :
- **WebUI**
- **REST-API**
Pour le test, jutiliserai la WebUI, je clique sur le bouton `APPS` en haut puis `CREATE APPLICATION`. Je choisis un magnifique nom d'application et une description.
![Create an application on Gotify](img/gotify-create-new-application.png)
Une fois mon application créée, un token est généré pour celle-ci. Je peux modifier lapplication pour changer quoi que ce soit, je peux aussi uploader une icône.
![Gotify application list showing my new Potato application](img/gotify-application-list.png)
### Tests
Mon application est maintenant visible dans la barre latérale, testons maintenant lenvoi dun message. Pour lenvoyer, je peux utiliser `curl` et jai besoin du token de lapplication.
```bash
curl "https://gotify.vezpi.me/message?token=<apptoken>" -F "title=Cooked!" -F "message=The potoaries are ready!" -F "priority=5"
```
Je reçois instantanément la notification sur mon mobile et dans mon navigateur.
Je renvoie un autre message mais avec une priorité plus basse : `-2`. Je ne reçois pas de notification dans mon navigateur, je remarque une légère différence entre les deux messages. Sur mon mobile, seule ma montre la reçoit, je ne la vois pas sur lécran, mais je la retrouve dans le centre de notifications.
![Messages received on Gotify WebUI](img/gotify-messages-received.png)
### Application Android
Voici quelques captures décran depuis mon appareil Android :
![Capture décran de lapplication Android Gotify pour la page de connexion](img/gotify-android-first-login.png)
Pour une raison inconnue, une notification apparaît aléatoirement pour me dire que je suis connecté à Gotify :
![Capture décran de lapplication Android Gotify avec les messages de test](img/gotify-android-test-messages.png)
### Conclusion
Dans la [documentation](https://gotify.net/docs/msgextras), jai trouvé quelques fonctionnalités supplémentaires, comme lajout dimages ou dactions cliquables. En résumé, ça fait le job, cest tout. Le processus dinstallation est simple, lutilisation nest pas compliquée, mais je dois créer une application pour obtenir un token, puis ajouter ce token à chaque fois que je veux envoyer un message.
## Ntfy
Ntfy semble très propre, installons-le et voyons ce quil propose !
### Installation
Même histoire ici avec `docker compose` sur `dockerVM`. Je crée un nouveau dossier `ntfy` dans `/appli/docker/` et je colle le template de `docker-compose.yml`.
`docker-compose.yml`
```yaml
services:
ntfy:
image: binwiederhier/ntfy
container_name: ntfy
command:
- serve
volumes:
- /appli/data/ntfy/data:/var/cache/ntfy
environment:
- TZ=Europe/Paris
- NTFY_BASE_URL=https://ntfy.vezpi.me
- NTFY_CACHE_FILE=/var/cache/ntfy/cache.db
- NTFY_AUTH_FILE=/var/cache/ntfy/auth.db
- NTFY_ATTACHMENT_CACHE_DIR=/var/cache/ntfy/attachments
- NTFY_AUTH_DEFAULT_ACCESS=deny-all
- NTFY_BEHIND_PROXY=true
- NTFY_ENABLE_LOGIN=true
user: 1000:1000
networks:
- web
labels:
- traefik.enable=true
- traefik.http.routers.ntfy.rule=Host(`ntfy.vezpi.me`)
- traefik.http.routers.ntfy.entrypoints=https
- traefik.http.routers.ntfy.tls.certresolver=letsencrypt
- traefik.http.services.ntfy.loadbalancer.server.port=80
healthcheck:
test: ["CMD-SHELL", "wget -q --tries=1 http://ntfy:80/v1/health -O - | grep -Eo '\"healthy\"\\s*:\\s*true' || exit 1"]
interval: 60s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
networks:
web:
external: true
```
Je crée aussi le dossier de volume persistant `/appli/data/ntfy/data/` dans `dockerVM` :
```bash
mkdir -p /appli/data/ntfy/data/
```
La [documentation](https://docs.ntfy.sh/config/) est impressionnante, jai essayé de rassembler la config pour un démarrage rapide. Je devrais être bon pour lancer le serveur.
Encore une fois ici, je crée un nouveau domaine pour mon proxy inverse Caddy sur OPNsense avec lURL [https://ntfy.vezpi.me](https://ntfy.vezpi.me).
```bash
$ docker compose up -d
[+] Running 4/4
✔ ntfy Pulled
✔ f18232174bc9 Already exists
✔ f5bf7a328fac Pull complete
✔ 572c745ef6c3 Pull complete
[+] Running 1/1
✔ Container ntfy Started
```
✅ LURL [https://ntfy.vezpi.me](https://ntfy.vezpi.me) me donne accès au tableau de bord Ntfy :
![Ntfy dashboard](img/ntfy-login-dashboard.png)
Au départ je nai aucun utilisateur et aucun nest créé par défaut. Comme jai interdit tout accès anonyme dans la config, je dois en créer un.
Pour lister les utilisateurs, je peux utiliser cette commande :
```bash
$ docker exec -it ntfy ntfy user list
user * (role: anonymous, tier: none)
- no topic-specific permissions
- no access to any (other) topics (server config)
```
Je crée un utilisateur avec les privilèges dadministration :
```bash
$ docker exec -it ntfy ntfy user add --role=admin vez
user vez added with role admin
```
Je peux maintenant me connecter à linterface Web, et passer en mode sombre, mes yeux me remercient.
### Topics
Dans Ntfy, il ny a pas dapplications à créer, mais les messages sont regroupés dans des topics, plus lisibles quun token lors de lenvoi. Une fois le topic créé, je peux changer le nom daffichage ou envoyer des messages de test. Sur linterface Web, cependant, je ne trouve aucune option pour changer licône, alors que cest possible depuis lapplication Android, ce qui nest pas très pratique.
![Example messages in Ntfy](img/ntfy-topic-messages.png)
### Tests
Envoyer un message est en fait plus difficile que prévu. Comme jai activé lauthentification, je dois aussi mauthentifier pour envoyer des messages :
```
curl \
-H "Title: Cooked!" \
-H "Priority: high" \
-d "The potatoes are ready!" \
-u "vez:<password>" \
https://ntfy.vezpi.me/patato
```
### Application Android
Voici quelques captures de lapplication Android Ntfy :
![Captures de lapplication Android Ntfy](img/ntfy-android-app.png)
### Conclusion
Ntfy est une belle application avec une [documentation](https://docs.ntfy.sh/) vraiment solide. Les possibilités sont infinies et la liste des intégrations est impressionnante. Linstallation nétait pas difficile mais demandait un peu plus de configuration. Le besoin dutiliser la CLI pour configurer les utilisateurs et les permissions nest pas très pratique.
Sur lapplication Android, je regrette quil ny ait pas une vue pour voir tous les messages des différents topics. En revanche, sur linterface Web, jaurais aimé pouvoir définir les icônes des topics. Ce que jai trouvé intéressant, cest la possibilité davoir des topics depuis différents serveurs.
## Comparaison
**Gotify** est simple, tous les utilisateurs auront accès à toutes les applications. Pas besoin d'identifiant utilisateur pour envoyer des messages, seulement le token de lapplication. Lapplication Android est efficace, mais personnellement, même si licône est amusante, je ne laime pas trop.
**Ntfy** semble plus avancé et complet, avec des permissions plus précises. Linterface est élégante tout en restant simple, les possibilités sont infinies.
Dans lensemble, seuls de petits détails me font préférer Ntfy à Gotify, par exemple, avoir accès à des topics de différents serveurs, les ACL ou la possibilité dajouter des émojis aux messages, mais les deux applications remplissent bien leur rôle.
## Implémentation de Notifications Réelles
Pendant que je mettais en place mon pipeline CI/CD pour le déploiement de mon blog, je voulais être averti chaque fois que quelque chose se passe, voyons comment je peux limplémenter avec Ntfy.
### Contrôle dAccès
Je pourrais utiliser mon utilisateur `admin` pour envoyer les messages depuis le pipeline et les recevoir sur mon appareil Android, même si cest plus simple à configurer, je veux appliquer le principe de moindre privilège, ce que Ntfy permet. Je vais donc créer un utilisateur dédié pour mon pipeline CI/CD et un autre pour mon appareil Android.
#### Utilisateur Pipeline
Celui-ci ne pourra qu'envoyer des messages sur le topic `blog`, je lappelle `gitea_blog`.
```bash
$ ntfy user add gitea_blog
user gitea_blog added with role user
$ ntfy access gitea_blog blog wo
granted write-only access to topic blog
user gitea_blog (role: user, tier: none)
- write-only access to topic blog
```
Je teste rapidement lenvoi dun message sur ce topic :
```bash
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/blog
{"id":"xIgwz9dr1w9Z","time":1749587681,"expires":1749630881,"event":"message","topic":"blog","message":"Message test from gitea_blog!"}
```
![Test denvoi de messages sur le topic blog avec Ntfy ](img/ntfy-testing-gitea-blog-user.png)
✅ Message reçu !
Je tente aussi un envoi sur mon topic de test :
```bash
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/potato
{"code":40301,"http":403,"error":"forbidden","link":"https://ntfy.sh/docs/publish/#authentication"}
```
❌ Refusé comme attendu.
#### Utilisateur Android
Depuis mon appareil Android, je veux uniquement recevoir les messages, mais sur tous les topics. Je crée lutilisateur `android_s25u` :
```bash
$ ntfy user add android_s25u
user android_s25u added with role user
$ ntfy access android_s25u "*" ro
granted read-only access to topic *
user android_s25u (role: user, tier: none)
- read-only access to topic *
```
✅ Après avoir configuré lutilisateur dans lapplication Android Ntfy, je peux lire mes messages sur `https://ntfy.vezpi.me/blog` et aussi sur le topic de test.
### Implémentation
Maintenant que mes utilisateurs sont prêts, je veux ajouter un job `Notify` dans mon pipeline CI/CD pour le déploiement du blog dans **Gitea**, vous pouvez retrouver le workflow complet dans [cet article]({{< ref "post/4-blog-deployment-ci-cd-pipeline-gitea-actions" >}}).
#### Créer un Secret
Pour permettre à mon Gitea Runner dutiliser lutilisateur `gitea_blog` dans ses jobs, je veux créer un secret. Jexplore le dépôt Gitea `Blog` dans `Settings`, puis `Actions` > `Secrets` > `Add Secret`. Jy mets la valeur du secret au format `<utilisateur>:<password>` :
![Add a secret in the blog Gitea repository](img/gitea-blog-ntfy-credentials.png)
### Écrire le Code `Notify`
Je peux maintenant écrire le code qui menverra un message quand un nouveau déploiement se produit.
Si le déploiement est un succès, la priorité sera minimale, pas besoin de notification sur mon mobile, juste pour garder une trace dans lapplication Android Ntfy si besoin.
Si quelque chose échoue, je veux être notifié sur mon mobile avec une priorité plus élevée. Ntfy me permet dajouter des actions sur mes notifications, je vais en créer 2 :
- **View Run** : Lien direct vers le workflow dans Gitea pour voir ce quil sest passé.
- **Verify Blog** : Lien vers le blog pour vérifier quil est toujours en ligne.
```yaml
Notify:
needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
runs-on: ubuntu
if: always()
env:
NTFY_URL: https://ntfy.vezpi.me
NTFY_TOPIC: blog
NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
steps:
- name: Notify Workflow Result
run: |
if [[
"${{ needs.Check-Rebuild.result }}" == "success" &&
("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
"${{ needs.Deploy-Staging.result }}" == "success" &&
"${{ needs.Test-Staging.result }}" == "success" &&
"${{ needs.Merge.result }}" == "success" &&
"${{ needs.Deploy-Production.result }}" == "success" &&
"${{ needs.Test-Production.result }}" == "success" &&
("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
]]; then
curl -H "Priority: min" \
-H "Tags: white_check_mark" \
-d "Blog workflow completed successfully." \
-u ${NTFY_TOKEN} \
${NTFY_URL}/${NTFY_TOPIC}
else
curl -H "Priority: high" \
-H "Tags: x" \
-H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
view, Verify Blog, https://blog.vezpi.com, clear=true" \
-d "Blog workflow failed!" \
-u ${NTFY_TOKEN} \
${NTFY_URL}/${NTFY_TOPIC}
fi
```
✅ Test des deux cas, fonctionne comme prévu :
![Checking both test scenario in Ntfy WebUI](img/ntfy-testing-blog-notifications.png)
## Conclusion
Après avoir testé **Gotify** et **Ntfy**, jai trouvé mon prochain système de notifications. Les deux sont bons pour le job, mais je devais en choisir un et jai une petite préférence pour Ntfy.
Lapplication serait parfaite si je pouvais gérer les utilisateurs et les accès depuis linterface Web. Aussi, je préférerais pouvoir gérer licône des topics globalement plutôt que depuis mon mobile.
Quoi quil en soit, je suis très satisfait du résultat de cette première implémentation et jai hâte dajouter des notifications ailleurs !

View File

@@ -1,379 +0,0 @@
---
slug: notification-system-gotify-vs-ntfy
title: Testing Gotify and Ntfy, a Self-Hosted Notification System
description: Gotify or Ntfy? I tested both to create a reliable, self-hosted notification system for my homelab and integrated it with CI/CD pipeline.
date: 2025-06-13
draft: false
tags:
- notification
- ntfy
- gotify
- ci-cd
categories:
- homelab
---
## Intro
To know what is going on in my homelab and be warned when something fails, I want to setup a notification system where almost anything could seamlessly send me a message that I would receive on my mobile.
In the past I was using **Pushover**, which was great, but I want to explore new options, more modern and eventually self-hosted.
## Choose the Right Notification System
The key elements to determine the right system for me would be:
- **Android application**: mandatory, a sleek and intuitive UI is important.
- **Integration**: I want the service integrated anywhere I want to be notified.
- **Self hosted**: Host it myself is always better for privacy.
After a quick research, the most suitable tools on the market are:
- **Ntfy**
- **Gotify**
Given the comments on internet and after testing quickly both Android app, I can't really decide. I think Ntfy is the better option, but I will install and test them both to make my mind!
## Gotify
I heard about Gotify some time ago, actually before looking at other alternatives, I had that one in mind. I quickly had a look at its [documentation](https://gotify.net/docs/) and this seems to be pretty straight forward.
### Installation
As usual, I will deploy the Gotify server with `docker compose` on `dockerVM`, a VM hosting my applications as docker container. I create a new `gotify` folder in `/appli/docker/` and I copy paste my `docker-compose.yml` template in there.
`docker-compose.yml`
```yaml
services:
gotify:
image: gotify/server
container_name: gotify
volumes:
- /appli/data/gotify/data/:/app/data
environment:
- TZ=Europe/Paris
- GOTIFY_DEFAULTUSER_NAME=${GOTIFY_DEFAULTUSER_NAME}
- GOTIFY_DEFAULTUSER_PASS=${GOTIFY_DEFAULTUSER_PASS}
networks:
- web
labels:
- traefik.enable=true
- traefik.http.routers.gotify.rule=Host(`gotify.vezpi.me`)
- traefik.http.routers.gotify.entrypoints=https
- traefik.http.routers.gotify.tls.certresolver=letsencrypt
- traefik.http.services.gotify.loadbalancer.server.port=80
restart: always
networks:
web:
external: true
```
`.env`
```
GOTIFY_DEFAULTUSER_NAME=vez
GOTIFY_DEFAULTUSER_PASS=<password>
```
In the [documentation](https://gotify.net/docs/config), I can see that several database backend can be used, by default it is using **sqlite3** which will be fine for the test. Switching to **PostgreSQL** could be a thing if I decide to stick with Gotify. In that same page, I can see the different environment variables that I can use to configure the server from the `docker-compose.yml` file.
When my config files are ready, I create a new entry in my Caddy plugin in OPNsense to forward my new Gotify URL: https://gotify.vezpi.me.
I also create the folder `/appli/data/gotify/data/` in `dockerVM` to mount it as a volume and store datas:
```bash
mkdir -p /appli/data/gotify/data/
```
Finally I spin the docker stack up:
```bash
$ docker compose up -d
[+] Running 5/5
✔ gotify Pulled
✔ 63ce8e957633 Pull complete
✔ e7def9680541 Pull complete
✔ 9a1821c438b4 Pull complete
✔ ad316556c9ff Pull complete
[+] Running 1/1
✔ Container gotify Started
```
✅ Reaching the URL https://gotify.vezpi.me gives me the Gotify login page:
![Gotify login page](img/gotify-login-page.png)
After login, I can access the dashboard, with no messages obviously:
![Gotify dashboard on a fresh installation](img/gotify-dashboard-no-messages.png)
### Creating an Application
To allow messages to be pushed, I before need to create an application for which the messages will be regrouped for. This can be done in two ways:
- **WebUI**
- **REST-API**
For the test, I will use the WebUI, I click on the `APPS` button at the top and `CREATE APPLICATION`. I choose a wonderful application name and description.
![Create an application on Gotify](img/gotify-create-new-application.png)
Once my application in created, a token is generated for it. I can edit the application to change anything, I can also upload an icon.
![Gotify application list showing my new Potato application](img/gotify-application-list.png)
### Testing
My application is now visible on the sidebar, let's now try to send a message. To push it, I can use `curl` and I need the token of the application.
```bash
curl "https://gotify.vezpi.me/message?token=<apptoken>" -F "title=Cooked!" -F "message=The potoaries are ready!" -F "priority=5"
```
I instantly received the notification on my mobile and on my browser.
I retried to send another message but with a lower priority: `-2`. I didn't get any notification in my browser, I see a slight differences between the two messages. On my mobile, only my watch received it, I don't see it on my screen, but I can find it on the notification center.
![Messages received on Gotify WebUI](img/gotify-messages-received.png)
### Android App
Here some screenshots from my Android device:
![Capture décran de lapplication Android Gotify pour la page de connexion](img/gotify-android-first-login.png)
For some reason, a notification randomly pops up to tell me that I'm connected to Gotify:
![Capture décran de lapplication Android Gotify avec les messages de test](img/gotify-android-test-messages.png)
### Conclusion
On the [documentation](https://gotify.net/docs/msgextras), I found some extras features, like adding images or click actions. In summary, it does the job, that's it. Easy installation process, the utilization is not hard, but I need to create an application for a token, then add this token anytime I want to push messages there.
## Ntfy
Ntfy seems very clean, let's install it and see what it got!
### Installation
Same story here with `docker compose` on `dockerVM`. I create a new `ntfy` folder in `/appli/docker/` and I copy paste the `docker-compose.yml` template.
`docker-compose.yml`
```yaml
services:
ntfy:
image: binwiederhier/ntfy
container_name: ntfy
command:
- serve
volumes:
- /appli/data/ntfy/data:/var/cache/ntfy
environment:
- TZ=Europe/Paris
- NTFY_BASE_URL=https://ntfy.vezpi.me
- NTFY_CACHE_FILE=/var/cache/ntfy/cache.db
- NTFY_AUTH_FILE=/var/cache/ntfy/auth.db
- NTFY_ATTACHMENT_CACHE_DIR=/var/cache/ntfy/attachments
- NTFY_AUTH_DEFAULT_ACCESS=deny-all
- NTFY_BEHIND_PROXY=true
- NTFY_ENABLE_LOGIN=true
user: 1000:1000
networks:
- web
labels:
- traefik.enable=true
- traefik.http.routers.ntfy.rule=Host(`ntfy.vezpi.me`)
- traefik.http.routers.ntfy.entrypoints=https
- traefik.http.routers.ntfy.tls.certresolver=letsencrypt
- traefik.http.services.ntfy.loadbalancer.server.port=80
healthcheck:
test: ["CMD-SHELL", "wget -q --tries=1 http://ntfy:80/v1/health -O - | grep -Eo '\"healthy\"\\s*:\\s*true' || exit 1"]
interval: 60s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
networks:
web:
external: true
```
I also create the persistent volume folder `/appli/data/ntfy/data/` in `dockerVM`:
```bash
mkdir -p /appli/data/ntfy/data/
```
The [documentation](https://docs.ntfy.sh/config/) is impressive, I tried to gather the config for a quickstart. I should be good to start the server.
Again here, I create a new domain for my Caddy reverse proxy plugin in OPNsense for the URL https://ntfy.vezpi.me.
```bash
$ docker compose up -d
[+] Running 4/4
✔ ntfy Pulled
✔ f18232174bc9 Already exists
✔ f5bf7a328fac Pull complete
✔ 572c745ef6c3 Pull complete
[+] Running 1/1
✔ Container ntfy Started
```
✅ The URL https://ntfy.vezpi.me gives me to the Ntfy dashboard:
![Ntfy dashboard](img/ntfy-login-dashboard.png)
At start I don't have any user and none is created by default, as I denied all access to anonymous in the config, I need to create one.
To list the users, I can use this command:
```bash
$ docker exec -it ntfy ntfy user list
user * (role: anonymous, tier: none)
- no topic-specific permissions
- no access to any (other) topics (server config)
```
I create an user with admin privileges:
```bash
$ docker exec -it ntfy ntfy user add --role=admin vez
user vez added with role admin
```
I can now login into the WebUI, and I can now switch to dark mode, my eyes are grateful.
### Topics
In Ntfy there are no applications to create, but messages are grouped into topics, more readable than a token when sending messages. When the topic is created I can change the display name or send test messages. On the WebUI though I don't find any option to change the icon, where I can find this option in the Android App which is not really convenient.
![Example messages in Ntfy](img/ntfy-topic-messages.png)
### Testing
Sending a message is actually harder than I thought. Because I set up authentication, I also need to authenticate to send messages:
```
curl \
-H "Title: Cooked!" \
-H "Priority: high" \
-d "The potatoes are ready!" \
-u "vez:<password>" \
https://ntfy.vezpi.me/patato
```
### Android App
Here are some screenshots of Ntfy Android App:
![Captures de lapplication Android Ntfy](img/ntfy-android-app.png)
### Conclusion
Ntfy is a beautiful application with a really strong [documentation](https://docs.ntfy.sh/). The possibilities are endless and the list of integration is impressive. The installation was not hard but required a bit of more setup. The needs for CLI to configure users and permissions is not really convenient.
On the Android App, I regret that there is not a view to see all the messages from different topics. On the other hand on the WebUI, I wanted to set icons for each topics. What I found interesting was the possibility to have topics from different servers.
## Comparison
**Gotify** is simple, all users will have access to any applications. You don't need user credentials to push messages, only the application token. The Android App is effective, but personally while the icon is funny, I don't really like it.
**Ntfy** feels more advanced and complete, with fine grained permission. The UI is sleek yet still simple, the possibilities endless.
Overall, only small details make me favor Ntfy over Gotify, eg., having access to topics from different servers, ACL or be able to add emojis to the messages, but both applications are really good for the job.
## Implementing Real Scenario Notification
While I was setting up my CI/CD pipeline for my blog deployment, I wanted to be warned whenever something happens, let see how I can implement that with Ntfy.
### Access Control
I could use my `admin` user to either send messages from the pipeline and receive them on my Android device, while this is easier to setup, I want to implement least access privileges, which Ntfy allow. I will then create a dedicated user for my CI/CD pipeline and another for my Android device.
#### Pipeline User
This guy will only be allowed to send messages on the `blog` topic, I call it `gitea_blog`.
```bash
$ ntfy user add gitea_blog
user gitea_blog added with role user
$ ntfy access gitea_blog blog wo
granted write-only access to topic blog
user gitea_blog (role: user, tier: none)
- write-only access to topic blog
```
I quickly try to send a message on that topic:
```bash
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/blog
{"id":"xIgwz9dr1w9Z","time":1749587681,"expires":1749630881,"event":"message","topic":"blog","message":"Message test from gitea_blog!"}
```
![Test denvoi de messages sur le topic blog avec Ntfy ](img/ntfy-testing-gitea-blog-user.png)
✅ Message received!
I also try to send a message on my test topic:
```bash
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/potato
{"code":40301,"http":403,"error":"forbidden","link":"https://ntfy.sh/docs/publish/#authentication"}
```
❌ Denied as expected.
#### Android Device User
From my Android device I only want to receive messages, but on all topics. I create the user `android_s25u`:
```bash
$ ntfy user add android_s25u
user android_s25u added with role user
$ ntfy access android_s25u "*" ro
granted read-only access to topic *
user android_s25u (role: user, tier: none)
- read-only access to topic *
```
✅ After setting up the user on the Ntfy Android App, I can read my messages on `https://ntfy.vezpi.me/blog` and also on the testing one.
### Implementation
Now my users are setup, I want to add a `Notify` job in my CI/CD pipeline for the blog deployment in **Gitea**, you can find the full workflow in [this article]({{< ref "post/4-blog-deployment-ci-cd-pipeline-gitea-actions" >}}).
#### Create a Secret
To allow my Gitea Runner to use my `gitea_blog` user in its job, I want to create a secret. I explore the `Blog` Gitea repository `Settings`, then `Actions` > `Secrets` > `Add Secret`. Here I set the secret value with the `<user>:<password>` format:
![Add a secret in the blog Gitea repository](img/gitea-blog-ntfy-credentials.png)
### Write the `Notify` Code
Now I can write the code which will send me a message when a new deployment occurs.
If the deployment is successful, the priority would be minimal, no notifications needed on my mobile, just for me to view the events in the Android Ntfy App if I need to.
If anything fails, I want to be notified on my mobile with higher priority. Ntfy allows me to add actions on my notifications, I will create 2 actions:
- **View Run**: Direct link to the workflow run in Gitea to see what happened.
- **Verify Blog**: Link to the blog to make sure it is still online.
```yaml
Notify:
needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
runs-on: ubuntu
if: always()
env:
NTFY_URL: https://ntfy.vezpi.me
NTFY_TOPIC: blog
NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
steps:
- name: Notify Workflow Result
run: |
if [[
"${{ needs.Check-Rebuild.result }}" == "success" &&
("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
"${{ needs.Deploy-Staging.result }}" == "success" &&
"${{ needs.Test-Staging.result }}" == "success" &&
"${{ needs.Merge.result }}" == "success" &&
"${{ needs.Deploy-Production.result }}" == "success" &&
"${{ needs.Test-Production.result }}" == "success" &&
("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
]]; then
curl -H "Priority: min" \
-H "Tags: white_check_mark" \
-d "Blog workflow completed successfully." \
-u ${NTFY_TOKEN} \
${NTFY_URL}/${NTFY_TOPIC}
else
curl -H "Priority: high" \
-H "Tags: x" \
-H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
view, Verify Blog, https://blog.vezpi.com, clear=true" \
-d "Blog workflow failed!" \
-u ${NTFY_TOKEN} \
${NTFY_URL}/${NTFY_TOPIC}
fi
```
✅ Testing both cases, work as expected:
![Checking both test scenario in Ntfy WebUI](img/ntfy-testing-blog-notifications.png)
## Conclusion
After testing **Gotify** and **Ntfy**, I found my next notification system. They are both good for the job but I had to pick one and I have a little preference for Ntfy.
The application would be perfect if I could manage the users and access from the WebUI, also I would prefer to manage the topic's icon globally and not having to upload it from my mobile.
Anyway I'm very satisfied with the results on my first implementation and I look forward to add notification elsewhere!

View File

@@ -1,708 +0,0 @@
---
slug: ac-automation-home-assistant-node-red
title: Automatisation Complète de la Climatisation avec Home Assistant et Node-RED
description: Comment jautomatise ma clim avec Home Assistant et Node-RED pour réagir à la température, lhumidité et à tous les évènements quotidiens.
date: 2025-06-27
draft: false
tags:
- home-automation
- home-assistant
- node-red
categories:
- automation
---
## Intro
Dans mon appartement, jai un système de climatisation Daikin, qui me permet de rafraîchir en été mais aussi de chauffer en hiver. Il est composé de 3 unités intérieures :
- Salon
- Chambre parentale
- Couloir (juste en face de mon bureau et de la chambre de mon fils)
Jai toujours trouvé ça pénible de devoir les allumer manuellement quand jen avais besoin, et joubliais souvent de les éteindre ensuite, sans parler de la télécommande que je passais mon temps à chercher.
Et si je pouvais automatiser tout ça ? Après tout, jutilise déjà Home Assistant pour piloter beaucoup de choses chez moi, alors contrôler la clim, ça me semble logique.
### Home Assistant
Home Assistant, cest le cerveau de ma maison connectée. Il relie tous mes appareils (lumières, capteurs, volets, etc.) dans une interface unique. Sa vraie force, cest la possibilité de créer des automatisations : si quelque chose se passe, alors fait ça. Des actions simples comme “allumer la lumière de la cuisine quand un mouvement est détecté” se mettent en place en quelques clics. Et pour des scénarios plus avancés, Home Assistant propose un système de scripts en YAML avec des conditions, des minuteries, des déclencheurs, et même du templating.
Mais dès quon commence à faire des automatisations un peu complexes, qui dépendent de plusieurs capteurs, dhoraires spécifiques ou de la présence de quelquun, ça devient vite difficile à lire. Les blocs de code YAML sallongent, et on ne sait plus trop ce qui fait quoi, surtout quand on veut corriger un petit détail plusieurs semaines plus tard.
### Node-RED
Cest exactement pour ça que je suis passé à Node-RED. Cest un outil visuel qui permet de construire des logiques avec des blocs appelés “nœuds”, quon relie entre eux avec des flèches pour créer un **flow**. Chaque nœud fait une petite action : déclencher à une certaine heure, vérifier une condition, envoyer une commande à un appareil, etc. Au lieu décrire du YAML, on glisse les éléments, on les connecte, et cest tout.
Node-RED ne remplace pas Home Assistant, il le renforce. Je ne détaillerai pas l'installation de Node-RED ni son intégration à HA, je l'ai fait il y a deux ans, mais de mémoire c'est assez simple.
## Ancien Workflow
Javais déjà une solution plutôt efficace pour contrôler ma climatisation via Home Assistant et Node-RED, mais je voulais laméliorer pour quelle prenne aussi en compte le taux dhumidité dans lappartement. Mon workflow actuel, bien quil fonctionne, nétait pas vraiment évolutif et assez difficile à maintenir :
![Ancien workflow Node-RED pour contrôler la climatisation](img/node-red-ha-ac-automation-before.png)
## Nouveau Workflow
Plutôt que de bricoler ce flow existant, jai préféré repartir de zéro avec le même objectif : piloter le système de climatisation en prenant en compte tous les capteurs disponibles : thermomètres, humidité, capteurs douverture, présence des occupants, moment de la journée, etc.
### Objectifs
Lidée est assez simple : ne plus avoir à penser à la climatisation, tout en restant efficace.
Mais concrètement, quest-ce que ça veut dire ? Je veux que la température et le taux dhumidité restent dans des valeurs confortables, que je sois présent ou non. Si jouvre les fenêtres, la clim doit sarrêter. Si lair est trop humide, je veux quil soit asséché. Si jallume ou éteins manuellement la clim, je ne veux pas que ça écrase mes réglages. La nuit, je nai pas besoin de rafraîchir le salon et je veux aussi que le système soit silencieux, etc.
Pour maider à faire tout ça, jutilise 4 [capteurs de température et dhumidité Aqara](https://eu.aqara.com/fr-eu/products/aqara-temperature-and-humidity-sensor), un dans chacune de mes pièces principales. Jutilise aussi quelques [capteurs douverture Aqara](https://eu.aqara.com/fr-eu/products/aqara-door-and-window-sensor) pour savoir si une fenêtre est ouverte.
### Workflow
Laissez-moi vous présenter mon nouveau workflow de climatisation dans Node-RED, et vous expliquer en détail comment il fonctionne :
![New Node-RED air conditioning workflow](img/node-red-new-ac-workflow-with-legend.png)
#### #### 1. Capteurs de Température
Dans le premier nœud, jai regroupé tous les capteurs thermiques dans un seul `trigger state node`, en ajoutant non seulement la température mais aussi le taux dhumidité géré par chaque capteur. Ce nœud contient donc une liste de 8 entités (2 pour chaque capteur). À chaque fois quune de ces 8 valeurs change, le nœud est déclenché :
![Nœud trigger state dans Node-RED avec les 8 entités](img/node-red-temperature-sensors-trigger-node.png)
Chacun de mes capteurs thermiques porte un nom de couleur en français, car ils ont tous un autocollant coloré pour les distinguer :
- **Jaune** : Salon
- **Bleu** : Chambre
- **Rouge** : Bureau
- **Vert** : Chambre de mon fils
Le deuxième nœud est un `function node` dont le rôle est de déterminer à quelle pièce appartient le capteur :
```js
const association = {
"temperature_jaune": "salon",
"temperature_bleu": "chambre",
"temperature_rouge": "couloir",
"temperature_vert": "couloir"
};
// Match pattern like: sensor.temperature_rouge_temperature
const match = msg.topic.match(/^sensor\.(.+)_(temperature|humidity)$/);
if (!match) {
node.warn("Topic format not recognized: " + msg.topic);
return null;
}
msg.payload = {
room: association[match[1]],
sensor: match[1]
};
return msg;
```
Pour le dernier nœud, dans la majorité des cas, les capteurs envoient deux messages simultanés : lun pour la température, lautre pour lhumidité. Jai donc ajouté un `join node` pour fusionner ces deux messages sils sont envoyés dans la même seconde :
![Join node in Node-RED to merge temperature and humidity](img/node-red-temperature-sensor-join-node.png)
#### 2. Notification
Il peut arriver que les capteurs de température nenvoient plus détat pendant un certain temps, pour une raison ou une autre. Dans ce cas, ils renvoient simplement leur dernière valeur connue, ce qui peut bloquer lunité de climatisation associée.
La solution que jai trouvée efficace consiste à envoyer une notification si un capteur na pas transmis de nouvelle valeur depuis plus de 3 heures. En fonctionnement normal, chaque capteur envoie une mise à jour environ toutes les 15 minutes.
Le premier nœud est un `function node` un peu technique, qui crée une variable de flux comme minuteur pour chaque capteur. Une fois le délai écoulé, un message est envoyé au nœud suivant :
```js
const sensor = msg.payload.sensor;
const timeoutKey = `watchdog_${sensor}`;
const messages = {
"temperature_jaune": {"title": "Température Salon", "message": "Capteur de température du salon semble hors service"},
"temperature_bleu": {"title": "Température Chambre", "message": "Capteur de température de la chambre semble hors service"},
"temperature_rouge": {"title": "Température Bureau", "message": "Capteur de température du bureau semble hors service"},
"temperature_vert": {"title": "Température Raphaël", "message": "Capteur de température de Raphaël semble hors service"}
};
// Clear existing timer
const existing = flow.get(timeoutKey);
if (existing) clearTimeout(existing);
// Set new timer
const timer = setTimeout(() => {
node.send({
payload: `⚠️ No update from ${sensor} in 3 hours.`,
sensor: sensor,
title: messages[sensor]["title"],
message: messages[sensor]["message"]
});
}, 3 * 60 * 60 * 1000); // 3 hours
flow.set(timeoutKey, timer);
return null; // Don't send anything now
```
Le second nœud est un `call service node` qui envoie une notification sur mon téléphone Android avec les informations fournies :
![Node-RED call service node for notification](img/node-red-call-service-node-notification.png)
#### 3. Curseurs de Température
Pour pouvoir ajuster la température sans avoir à modifier tout le workflow, jai créé deux entrées (ou helper) Home Assistant, de type number, pour chaque unité de climatisation, ce qui me fait un total de 6 entrées :
![Curseur de température dans Home Assistant pour chaque unité](img/home-assistant-temperature-room-sliders.png)
Ces valeurs représentent la température de base utilisée pour le calcul des seuils, en fonction des offsets que je détaillerai plus loin.
Le premier nœud est un `trigger state node` qui regroupe les 6 entités. Si je modifie lune de ces valeurs, le nœud est déclenché :
![Node-RED trigger state node for sliders](img/node-red-trigger-state-mode-for-sliders.png)
Le deuxième nœud est un `function node`, qui permet de déterminer la pièce concernée :
```js
const association = {
"input_number.temp_ete_salon": "salon",
"input_number.temp_hiver_salon": "salon",
"input_number.temp_ete_chambre": "chambre",
"input_number.temp_hiver_chambre": "chambre",
"input_number.temp_ete_couloir": "couloir",
"input_number.temp_hiver_couloir": "couloir"
};
msg.payload = { room: association[msg.topic] };
return msg;
```
#### 4. Interrupteurs
Dans Home Assistant, jutilise dautres entrées, mais cette fois sous forme de booléens. Le plus important est celui dédié à la climatisation, qui me permet de désactiver manuellement tout le workflow. Jen ai dautres qui sont automatisés, par exemple pour le moment de la journée ou la détection de présence à la maison.
Jutilise un autre `trigger state node` qui regroupe tous mes interrupteurs sous forme de booléens, y compris un bouton de test utilisé pour le débogage :
![Node-RED trigger state node for toggles](img/node-red-trigger-state-node-toggles.png)
Comme ces interrupteurs impactent tout lappartement (et non une seule unité), le nœud suivant est un `change node` qui définit la valeur de la pièce à `partout` :
![Node-RED change node to set room to partout](img/node-red-change-node-room-partout.png)
#### 5. Fenêtres
Les derniers déclencheurs sont les fenêtres. Si jouvre ou ferme une fenêtre située près dune unité, cela active le workflow. Jai des capteurs douverture sur certaines fenêtres, mais pour lunité du couloir, jutilise létat des fenêtres Velux. Certaines pièces ayant plusieurs fenêtres, jai créé une entrée de type groupe pour les regrouper.
Le premier nœud est le dernier `trigger state node`. La valeur retournée est une string quil faudra ensuite convertir en booléen :
![Node-RED trigger state node for windows](img/node-red-trigger-state-node-windows.png)
Juste après, un autre `function node` permet didentifier la pièce concernée :
```js
const association = {
"binary_sensor.groupe_fenetre_salon": "salon",
"binary_sensor.fenetre_chambre_contact": "chambre",
"cover.groupe_fenetre_couloir": "couloir"
};
msg.payload = {
room: association[msg.topic]
};
return msg;
```
#### 6. Fenêtre Watchdog
Quand jouvre une fenêtre, ce nest pas forcément pour la laisser ouverte longtemps. Je peux simplement faire sortir le chat ou jeter un œil au portail. Je ne veux pas que la climatisation se coupe dès que jouvre une fenêtre. Pour contourner cela, jai mis en place un watchdog pour chaque unité, afin de retarder lenvoi du message pendant un certain temps.
Le premier nœud est un `switch node`. En fonction de la pièce transmise par le nœud précédent, il envoie le message au _watchdog_ correspondant :
![Node-RED switch node based on the room for the watchdog](img/node-red-switch-node-room-selector-watchdog.png)
Viennent ensuite les _watchdogs_, des `trigger nodes`, qui retardent le message pendant un certain temps, et prolongent ce délai si un autre message est reçu entre-temps :
![Node-RED trigger node for window watchdog](img/node-red-trigger-node-window-watchdog.png)
#### 7. Climatisation Activée ?
Tous ces déclencheurs arrivent maintenant dans la chaîne de traitement, qui va déterminer ce que le système doit faire. Mais avant cela, on vérifie si lautomatisation est activée. Jai ajouté ce kill switch au cas où, même si je lutilise rarement.
Le premier nœud est un `delay node` qui régule le débit des messages entrants à 1 message par seconde :
![Node-RED delay node to limit the rate to 1 message per second](img/node-red-delay-node-1-msg-per-second.png)
Le deuxième nœud est un `current state node` qui vérifie si le booléen `climatisation` est activé :
![Node-RED current state node for climatisation](img/node-red-current-state-node-climatisation-enabled.png)
#### 8. Configuration des pièces
Lidée ici est dassocier la configuration de la pièce au message. Chaque pièce a sa propre configuration : quelle unité est utilisée, quels capteurs sont associés, et surtout, dans quelles conditions elle doit sallumer ou séteindre.
Les unités de climatisation disposent de 4 modes :
- Refroidissement (Cool)
- Déshumidification (Dry)
- Ventilation (Fan)
- Chauffage (Heat)
Pour déterminer quel mode utiliser, jutilise des seuils pour chaque mode et la vitesse de ventilation, avec différents offsets selon la situation. Je peux ainsi définir un offset spécifique la nuit ou en cas dabsence. Je peux aussi définir un offset sur `disabled`, ce qui forcera larrêt de lunité.
Le premier nœud est un `switch node`, basé sur la valeur `room`, qui oriente le message vers la configuration associée. Si la pièce est `partout`, le message est dupliqué vers les 3 configurations de pièce :
![Node-RED switch node for room configuration](img/node-red-switch-node-room-config.png)
Il est ensuite connecté à un `change node`, qui ajoute la configuration dans `room_config`. Voici un exemple avec la configuration du salon :
```json
{
"threshold": {
"cool": {
"start": {
"1": 1,
"2": 1.5,
"3": 2,
"4": 2.5,
"quiet": 0
},
"stop": -0.3,
"target": -1,
"offset": {
"absent": 1,
"vacances": "disabled",
"fenetre": "disabled",
"matin": "disabled",
"jour": 0,
"soir": 0,
"nuit": "disabled"
}
},
"dry": {
"start": {
"quiet": -1
},
"stop": -1.5,
"offset": {
"absent": "1.5",
"vacances": "disabled",
"fenetre": "disabled",
"matin": "disabled",
"jour": 0,
"soir": 0,
"nuit": "disabled"
}
},
"fan_only": {
"start": {
"1": -0.3,
"quiet": -0.5
},
"stop": -0.7,
"offset": {
"absent": "disabled",
"vacances": "disabled",
"fenetre": "disabled",
"matin": "disabled",
"jour": 0,
"soir": 0,
"nuit": "disabled"
}
},
"heat": {
"start": {
"1": 0,
"2": -1.5,
"quiet": 0
},
"stop": 1,
"target": 1,
"offset": {
"absent": -1.5,
"vacances": -3,
"fenetre": "disabled",
"matin": 0,
"jour": 0,
"soir": 0,
"nuit": -1.5
}
}
},
"unit": "climate.clim_salon",
"timer": "timer.minuteur_clim_salon",
"window": "binary_sensor.groupe_fenetre_salon",
"thermometre": "sensor.temperature_jaune_temperature",
"humidity": "sensor.temperature_jaune_humidity",
"temp_ete": "input_number.temp_ete_salon",
"temp_hiver": "input_number.temp_hiver_salon"
}
```
#### #### 9. Calcul
Maintenant que le message contient la configuration de la pièce, on entre dans la phase de calcul. On dispose du nom de lunité de climatisation, des capteurs associés, de la température de base souhaitée et de loffset à appliquer. À partir de ces données, on récupère les états actuels et on effectue les calculs.
Le premier nœud est un `delay node` qui régule le débit des messages entrants, car le bloc précédent a potentiellement généré trois messages si toutes les pièces sont concernées.
Le deuxième nœud est le plus important du workflow, un `function node` qui remplit plusieurs rôles :
- Récupère les états des capteurs depuis Home Assistant
- Calcule les seuils des modes à partir des offsets
- Désactive certains modes si les conditions sont remplies
- Injecte les valeurs dans le `payload`
```js
// --- Helper: Get Home Assistant state by entity ID ---
function getState(entityId) {
return global.get("homeassistant.homeAssistant.states")[entityId]?.state;
}
// --- Determine current time period based on sensors ---
const periods = ["jour", "soir", "nuit", "matin"];
msg.payload.period = periods.find(p => getState(`binary_sensor.${p}`) === 'on') || 'unknown';
// --- Determine presence status (absent = inverse of presence) ---
const vacances = getState("input_boolean.absent");
const absent = getState("input_boolean.presence") === 'on' ? 'off' : 'on';
/**
* Recursively adds the base temperature and offset to all numeric start values in a threshold config
*/
function applyOffsetToThresholds(threshold, baseTemp, globalOffset) {
for (const [key, value] of Object.entries(threshold)) {
if (key === "offset") continue;
if (typeof value === 'object') {
applyOffsetToThresholds(value, baseTemp, globalOffset);
} else {
threshold[key] += baseTemp + globalOffset;
}
}
}
/**
* Calculates the global offset for a mode, based on presence, vacation, window, and time of day
*/
function calculateGlobalOffset(offsets, modeName, windowState, disabledMap) {
let globalOffset = 0;
for (const [key, offsetValue] of Object.entries(offsets)) {
let conditionMet = false;
if (key === msg.payload.period) conditionMet = true;
else if (key === "absent" && absent === 'on') conditionMet = true;
else if (key === "vacances" && vacances === 'on') conditionMet = true;
else if ((key === "fenetre" || key === "window") && windowState === 'on') conditionMet = true;
if (conditionMet) {
if (offsetValue === 'disabled') {
disabledMap[modeName] = true;
return 0; // Mode disabled immediately
}
globalOffset += parseFloat(offsetValue);
}
}
return globalOffset;
}
/**
* Main logic: compute thresholds for the specified room using the provided config
*/
const cfg = msg.payload.room_config;
const room = msg.payload.room;
// Normalize window sensor state
const rawWindow = getState(cfg.window);
const window = rawWindow === 'open' ? 'on' : rawWindow === 'closed' ? 'off' : rawWindow;
// Gather temperatures
const temps = cfg.thermometre.split(',')
.map(id => parseFloat(getState(id)))
.filter(v => !isNaN(v));
const temp_avg = temps.reduce((a, b) => a + b, 0) / temps.length;
const temp_min = Math.min(...temps);
const temp_max = Math.max(...temps);
// Gather humidity
const humidities = cfg.humidity.split(',')
.map(id => parseFloat(getState(id)))
.filter(v => !isNaN(v));
const humidity_avg = humidities.reduce((a, b) => a + b, 0) / humidities.length;
const humidity_min = Math.min(...humidities);
const humidity_max = Math.max(...humidities);
// Get base temps
const temp_ete = parseFloat(getState(cfg.temp_ete));
const temp_hiver = parseFloat(getState(cfg.temp_hiver));
// Process modes
const { threshold } = cfg;
const modes = ["cool", "dry", "fan_only", "heat"];
const disabled = {};
for (const mode of modes) {
const baseTemp = (mode === "heat") ? temp_hiver : temp_ete;
const globalOffset = calculateGlobalOffset(threshold[mode].offset, mode, window, disabled);
applyOffsetToThresholds(threshold[mode], baseTemp, globalOffset);
}
// Final message
msg.payload = {
...msg.payload,
unit: cfg.unit,
timer: cfg.timer,
threshold,
window,
temp: {
min: temp_min,
max: temp_max,
avg: Math.round(temp_avg * 100) / 100
},
humidity: {
min: humidity_min,
max: humidity_max,
avg: Math.round(humidity_avg * 100) / 100
},
disabled
};
return msg;
```
Le troisième nœud est un `filter node`, qui ignore les messages suivants ayant un contenu similaire :
![Node-RED filter node to block similar message](img/node-red-filter-node-blocker.png)
Le quatrième nœud vérifie si un verrou est actif à laide dun `current state node`. On regarde si le minuteur associé à lunité est inactif. Si ce nest pas le cas, le message est ignoré :
![Node-RED current state node for timer lock](img/node-red-current-state-node-lock-timer.png)
Le dernier nœud est un autre `current state node` qui permet de récupérer létat actuel de lunité et ses propriétés :
![Node-RED current state node to get current unit state](img/node-red-current-state-node-get-unit-state.png)
#### 10. État Cible
Après les calculs, il s'agit maintenant de déterminer quel doit être le mode cible, quelle action effectuer pour converger vers ce mode à partir de létat actuel, et le cas échéant, quelle vitesse de ventilation utiliser pour ce mode.
Les trois nœuds suivants sont des `function nodes`. Le premier détermine le mode cible à adopter parmi : `off`, `cool`, `dry`, `fan_only` et `heat` :
```js
const minHumidityThreshold = 52;
const maxHumidityThreshold = 57;
// Helper: check if mode can be activated or stopped
function isModeEligible(mode, temps, humidity, thresholds, currentMode) {
const isCurrent = (mode === currentMode);
const threshold = thresholds[mode];
if (msg.payload.disabled?.[mode]) return false;
// Determine which temperature to use for start/stop:
// start: temp.max (except heat uses temp.min)
// stop: temp.avg
let tempForCheckStart;
if (mode === "heat") {
tempForCheckStart = temps.min; // heat start uses min temp
} else {
tempForCheckStart = temps.max; // others start use max temp
}
const tempForCheckStop = temps.avg;
// Dry mode also depends on humidity thresholds
// humidity max for start, humidity avg for stop
let humidityForCheckStart = humidity.max;
let humidityForCheckStop = humidity.avg;
// For heat mode (inverted logic)
if (mode === "heat") {
if (!isCurrent) {
const minStart = Math.min(...Object.values(threshold.start));
return tempForCheckStart < minStart;
} else {
return tempForCheckStop < threshold.stop;
}
}
// For dry mode (humidity-dependent)
if (mode === "dry") {
// Skip if humidity too low
if (humidityForCheckStart <= (isCurrent ? minHumidityThreshold : maxHumidityThreshold)) return false;
const minStart = Math.min(...Object.values(threshold.start));
if (!isCurrent) {
return tempForCheckStart >= minStart;
} else {
return tempForCheckStop >= threshold.stop;
}
}
// For cool and fan_only
if (!isCurrent) {
const minStart = Math.min(...Object.values(threshold.start));
return tempForCheckStart >= minStart;
} else {
return tempForCheckStop >= threshold.stop;
}
}
// --- Main logic ---
const { threshold, temp, humidity, current_mode, disabled } = msg.payload;
const priority = ["cool", "dry", "fan_only", "heat"];
let target_mode = "off";
// Loop through priority list and stop at the first eligible mode
for (const mode of priority) {
if (isModeEligible(mode, temp, humidity, threshold, current_mode)) {
target_mode = mode;
break;
}
}
msg.payload.target_mode = target_mode;
if (target_mode === "cool" || target_mode === "heat") {
msg.payload.set_temp = true;
}
return msg;
```
Le second compare le mode actuel avec le mode cible et choisit laction à effectuer :
- **check** : le mode actuel est identique au mode cible.
- **start** : lunité est éteinte, mais un mode actif est requis.
- **change** : lunité est allumée, mais le mode cible est différent du mode actuel (et nest pas `off`).
- **stop** : lunité est allumée mais doit être arrêtée.
```js
let action = "check"; // default if both are same
if (msg.payload.current_mode === "off" && msg.payload.target_mode !== "off") {
action = "start";
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode !== "off" && msg.payload.current_mode !== msg.payload.target_mode) {
action = "change";
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode === "off") {
action = "stop";
}
msg.payload.action = action;
return msg;
```
Le dernier nœud détermine la vitesse de ventilation appropriée pour le mode cible, en fonction des seuils définis :
```js
// Function to find the appropriate speed key based on temperature and mode
function findSpeed(thresholdStart, temperature, mode) {
let closestSpeed = 'quiet';
let closestTemp = mode === 'heat' ? Infinity : -Infinity;
for (const speedKey in thresholdStart) {
if (speedKey !== 'quiet') {
const tempValue = thresholdStart[speedKey];
if (mode === 'heat') {
if (tempValue >= temperature && tempValue <= closestTemp) {
closestSpeed = speedKey;
closestTemp = tempValue;
}
} else { // cool, fan_only
if (tempValue <= temperature && tempValue >= closestTemp) {
closestSpeed = speedKey;
closestTemp = tempValue;
}
}
}
}
return closestSpeed;
}
if (msg.payload.target_mode && msg.payload.target_mode !== "off" && msg.payload.target_mode !== "dry") {
const modeData = msg.payload.threshold[msg.payload.target_mode];
if (modeData && modeData.start) {
if (msg.payload.target_mode === "heat") {
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.min, 'heat');
} else {
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.max, 'cool');
}
} else {
node.error("Invalid mode data or missing 'start' thresholds", msg);
}
} else {
// No need for speed in 'off' or 'dry' modes
msg.payload.speed = null;
}
return msg;
```
#### 11. Choix de l'Action
En fonction de laction à effectuer, le `switch node` va router le message vers le bon chemin :
![Node-RED `switch node` pour sélectionner laction](img/node-red-switch-node-select-action.png)
#### 12. Démarrage
Lorsque laction est `start`, il faut dabord allumer lunité. Cela prend entre 20 et 40 secondes selon le modèle, et une fois démarrée, lunité est verrouillée pendant un court laps de temps pour éviter les messages suivants.
Le premier nœud est un `call service node` utilisant le service `turn_on` sur lunité de climatisation :
![Node-RED call service node with turn_on service](img/node-red-call-service-node-turn-on.png)
Le second nœud est un autre `call service node` qui va démarrer un minuteur de verrouillage (lock timer) pour cette unité pendant 45 secondes :
![Node-RED call service node to start the unit timer](img/node-red-call-service-node-start-timer.png)
Le dernier est un `delay node` de 5 secondes, pour laisser le temps à lintégration Daikin de Home Assistant de refléter le nouvel état.
---
#### 13. Changement
Laction `change` est utilisée pour passer dun mode à un autre, mais aussi juste après lallumage.
Le premier nœud est un `call service node` utilisant le service `set_hvac_mode` sur lunité de climatisation :
![Node-RED call service node with set_hvac_mode service](img/node-red-call-service-node-set-hvac-mode.png)
Le nœud suivant est un `delay node` de 5 secondes.
Le dernier vérifie, avec un `switch node`, si la température cible doit être définie. Cela nest nécessaire que pour les modes `cool` et `heat` :
![Node-RED switch node for set_temp](img/node-red-switch-node-set-temp.png)
---
#### 14. Définir la Température Cible
La température cible est uniquement pertinente pour les modes `cool` et `heat`. Avec une climatisation classique, vous définissez une température à atteindre — cest exactement ce quon fait ici. Mais comme chaque unité utilise son propre capteur interne pour vérifier cette température, je ne leur fais pas vraiment confiance. Si la température cible est déjà atteinte selon lunité, elle ne soufflera plus du tout.
Le premier nœud est un autre `call service node` utilisant le service `set_temperature` :
![Node-RED call service node with set_temperature service](img/node-red-call-service-node-set-temperature-service.png)
Encore une fois, ce nœud est suivi dun `delay node` de 5 secondes.
#### 15. Vérification
Laction `check` est utilisée presque tout le temps. Elle consiste uniquement à vérifier et comparer la vitesse de ventilation souhaitée, et à la modifier si nécessaire.
Le premier nœud est un `switch node` qui vérifie si la valeur `speed` est définie :
![Node-RED switch node to test if speed is defined](img/node-red-switch-node-fan-speed.png)
Le deuxième est un autre `switch node` qui compare la valeur `speed` avec la vitesse actuelle :
![Node-Red switch node to compare speed](img/node-red-switch-node-compare-speed.png)
Enfin, le dernier nœud est un `call service node` utilisant le service `set_fan_mode` pour définir la vitesse du ventilateur :
![Node-RED call service node with set_fan_mode](img/node-red-call-service-node-set-fan-mode.png)
#### 16. Arrêt
Lorsque laction est `stop`, lunité de climatisation est simplement arrêtée.
Le premier nœud est un `call service node` utilisant le service `turn_off` :
![Node-RED call service node with turn_off service](img/node-red-call-service-node-turn-off.png)
Le deuxième nœud est un autre `call service node` qui va démarrer le minuteur de verrouillage de cette unité pour 45 secondes.
#### 17. Intervention Manuelle
Parfois, pour une raison ou une autre, on souhaite utiliser la climatisation manuellement. Dans ce cas, on ne veut pas que le flux Node-RED vienne écraser notre réglage manuel, du moins pendant un certain temps.
Node-RED utilise son propre utilisateur dans Home Assistant, donc si une unité change détat sans cet utilisateur, cest quune intervention manuelle a eu lieu.
Le premier nœud est un `trigger state node`, qui envoie un message dès quune unité AC change détat :
![node-red-trigger-state-unit-change.png](img/node-red-trigger-state-unit-change.png)
Le deuxième est un `function node` qui associe lunité avec son minuteur :
```js
const association = {
"climate.clim_salon": "timer.minuteur_clim_salon",
"climate.clim_chambre": "timer.minuteur_clim_chambre",
"climate.clim_couloir": "timer.minuteur_clim_couloir"
};
msg.payload = association[msg.topic];
return msg;
```
Le troisième est un `switch node` qui laisse passer le message uniquement si le `user_id` **nest pas** celui de Node-RED :
![Node-RED switch node not specific user_id](img/node-red-switch-node-user-id.png)
Le quatrième est un autre `switch node` qui vérifie que le champ `user_id` **est bien défini** :
![Node-RED switch node check user_id not null](img/node-red-switch-node-check-user-id.png)
Enfin, le dernier nœud est un `call service node` utilisant le service `start` sur le minuteur de lunité, avec sa durée par défaut (60 minutes) :
![Node-RED call service node start timer with default duration](img/node-red-call-service-node-start-unit-timer.png)
## TL;DR
Avec cette configuration, mon système de climatisation est entièrement automatisé, du refroidissement en été au chauffage en hiver, tout en gardant un œil sur le taux dhumidité.
Cela ma demandé pas mal de réflexion, dajustements et de tests, mais au final je suis vraiment satisfait du résultat. Cest pourquoi je le partage ici, pour vous donner des idées sur ce quon peut faire en domotique.
Si vous pensez que certaines choses pourraient être faites autrement, nhésitez pas à me contacter pour en discuter ou me proposer de nouvelles idées!

View File

@@ -1,700 +0,0 @@
---
slug: ac-automation-home-assistant-node-red
title: Full AC Automation with Home Assistant and Node-RED
description: How I automate my AC with Home Assistant and Node-RED to react to temperature, humidity and all daily events.
date: 2025-06-27
draft: false
tags:
- home-automation
- home-assistant
- node-red
categories:
- automation
---
## Intro
In my apartment I have a Daikin air conditioning system, to cool it down in summer, but also warm it up in winter. It is composed of 3 indoor units:
- Living room
- Master bedroom
- Hallway (in front of my office and my kid's room)
I always find it boring to have to turn them on when I needed, I forgot to turn them off when I should and I was constantly chasing the remote.
What if I could automate it? After all, I already use Home Assistant to control many devices at home, controlling the AC seems natural to me.
### Home Assistant
Home Assistant is the brain of my smart home. It connects all my devices (lights, sensors, shutters, etc.) under a single interface. What makes it so powerful is the ability to create automations: if something happens, then do something else. Simple things like “turn on the kitchen light when the motion sensor is triggered” are a breeze. For more advanced workflows, it offers YAML-based scripts with conditions, delays, triggers, and templates.
That said, once automations start getting more complex, like reacting to multiple sensors, time ranges, or presence detection, they can quickly turn into long, hard-to-follow blocks of code. Its easy to lose track of what does what, especially when you want to tweak just one small part weeks later.
### Node-RED
Thats exactly why I turned to Node-RED. Its a visual tool that lets you build logic using blocks called “nodes”, which you connect with wires to create flows. Each node performs a small task: trigger at a certain time, check a condition, send a command to a device, etc. Instead of writing YAML, you just drag, drop, and connect.
Node-RED does not replace Home Assistant, it empowers it. I won't cover the installation of Node-RED neither the integration in HA, I've done that 2 years ago, but for that I remember, this is quite straightforward.
## Previous Workflow
I was already having a good solution to control my AC from Home Assistant with Node-RED, but I wanted to enhance it to also handle the humidity level at home. My current workflow, despite being functional, was not really scalable and quite hard to maintain:
![Ancien workflow Node-RED pour contrôler la climatisation](img/node-red-ha-ac-automation-before.png)
## New Workflow
Instead of tweaking this workflow, I created a new one from scratch, with the same goal in mind: control the AC system by taking into account all available sensors: thermometers, humidity, door sensors, occupant presence, time of day, etc.
### Objectives
The idea is pretty simple: do not having to think about AC while still being efficient.
That being said, what does that mean? I want to keep the temperature and humidity level in check, whenever I'm here or not. If I open the windows, it should stop blowing. If it is too wet, I want to dry the air. If I turn the AC on or off manually, I don't want it to overwrite my setting. If it's night, I don't need to cool my living-room and I want it quiet, etc.
To help me achieve that, I'm using 4 [Aqara temperature and humidity sensors](https://eu.aqara.com/en-eu/products/aqara-temperature-and-humidity-sensor), one in each of my main room. I'm also using some [Aqara door sensors](https://eu.aqara.com/en-eu/products/aqara-door-and-window-sensor, to detect it windows are open.
### Workflow
Let me introduce my new AC workflow within Node-RED and explain what it does in detail:
![New Node-RED air conditioning workflow](img/node-red-new-ac-workflow-with-legend.png)
#### 1. Temperature Sensors
In the first node, I combined all the temperature sensors together in one `trigger state node`, but I also added humidity levels in addition to the temperature, managed by the sensor. The node then contains 8 entities in a list (2 for each of my sensor). Each time one value change out of these 8 entities, the node is triggered:
![Nœud trigger state dans Node-RED avec les 8 entités](img/node-red-temperature-sensors-trigger-node.png)
Each of my temperature sensors are named with a color in French, because each has its own color sticker to distinguish them:
- **Jaune**: Living room
- **Bleu**: Bedroom
- **Rouge**: Office
- **Vert**: Kid's bedroom
The second node is a `function node` which has the role the determine the room of the sensor (`function node` is written in **JavaScript**):
```js
const association = {
"temperature_jaune": "salon",
"temperature_bleu": "chambre",
"temperature_rouge": "couloir",
"temperature_vert": "couloir"
};
// Match pattern like: sensor.temperature_rouge_temperature
const match = msg.topic.match(/^sensor\.(.+)_(temperature|humidity)$/);
if (!match) {
node.warn("Topic format not recognized: " + msg.topic);
return null;
}
msg.payload = {
room: association[match[1]],
sensor: match[1]
};
return msg;
```
For the last node, most of the time, the sensors will send two messages at the same time, one containing the temperature value and the other, the humidity level. I added a `join node` to combined the two messages if they are sent within the same second:
![Join node in Node-RED to merge temperature and humidity](img/node-red-temperature-sensor-join-node.png)
#### 2. Notification
It can happen that the temperature sensors are not sending states anymore for some reason. In that case, they will always return their last value, which would lock the associated AC unit.
The workaround I found effective is to send a notification if the sensor did not send a new value in the last 3 hours. In normal situation, the sensor send an update approximately every 15 minutes.
The first node is a `function node` a bit tricky which will generate flow variable as timer for each sensor. When the timeout is reach, it sends a message to the next node:
```js
const sensor = msg.payload.sensor;
const timeoutKey = `watchdog_${sensor}`;
const messages = {
"temperature_jaune": {"title": "Température Salon", "message": "Capteur de température du salon semble hors service"},
"temperature_bleu": {"title": "Température Chambre", "message": "Capteur de température de la chambre semble hors service"},
"temperature_rouge": {"title": "Température Bureau", "message": "Capteur de température du bureau semble hors service"},
"temperature_vert": {"title": "Température Raphaël", "message": "Capteur de température de Raphaël semble hors service"}
};
// Clear existing timer
const existing = flow.get(timeoutKey);
if (existing) clearTimeout(existing);
// Set new timer
const timer = setTimeout(() => {
node.send({
payload: `⚠️ No update from ${sensor} in 3 hours.`,
sensor: sensor,
title: messages[sensor]["title"],
message: messages[sensor]["message"]
});
}, 3 * 60 * 60 * 1000); // 3 hours
flow.set(timeoutKey, timer);
return null; // Don't send anything now
```
The second node is a `call service node` which send a notification on my Android device with the value given:
![Node-RED call service node for notification](img/node-red-call-service-node-notification.png)
#### 3. Temperature Sliders
To have a control over the temperature without having to change the workflow, I created two Home Assistant helper, as number, which I can adjust for each unit, giving me 6 helpers in total:
![Curseur de température dans Home Assistant pour chaque unité](img/home-assistant-temperature-room-sliders.png)
These values are the base temperature used for the calculation of the threshold, depending off the offset which I will detail further.
The first node is a `trigger state node`, with all 6 entities combined. If I change one value, the node is triggered:
![Node-RED trigger state node for sliders](img/node-red-trigger-state-mode-for-sliders.png)
The second node is a `function node`, to determine the room affected:
```js
const association = {
"input_number.temp_ete_salon": "salon",
"input_number.temp_hiver_salon": "salon",
"input_number.temp_ete_chambre": "chambre",
"input_number.temp_hiver_chambre": "chambre",
"input_number.temp_ete_couloir": "couloir",
"input_number.temp_hiver_couloir": "couloir"
};
msg.payload = { room: association[msg.topic] };
return msg;
```
#### 4. Toggles
In Home Assistant, I'm using other helper but as boolean, the most important is the AC one, where I can manually disable the whole workflow. I have other which are automated, for the time of the day or for detect presence at home.
I have another `trigger state node` with all my toggles as boolean, including a test button, for debug purpose:
![Node-RED trigger state node for toggles](img/node-red-trigger-state-node-toggles.png)
As toggles affect the whole apartment and not a single unit, the next node is a `change node`, which set the room value to `partout` (everywhere):
![Node-RED change node to set room to partout](img/node-red-change-node-room-partout.png)
#### 5. Windows
The last triggers are my windows, if I open or close a window next to my unit, it triggers the workflow. I have door sensor for some of my doors, but for the hallway unit, I'm using the Velux windows state. Some rooms have more than one, I created a group helper for them.
The first node is the last `trigger state node`, the returned value is a string which I will have to convert later into boolean:
![Node-RED trigger state node for windows](img/node-red-trigger-state-node-windows.png)
Connected to it, again a `function node` to select the affect room:
```js
const association = {
"binary_sensor.groupe_fenetre_salon": "salon",
"binary_sensor.fenetre_chambre_contact": "chambre",
"cover.groupe_fenetre_couloir": "couloir"
};
msg.payload = {
room: association[msg.topic]
};
return msg;
```
#### 6. Window Watchdog
When I open a window, it is not necessarily to let it open for a long time. I could just let the cat out or having a look at my portal. I don't want my AC tuned off as soon as open it. To workaround that I created a watchdog for each unit, to delay the message for some time.
The first node is a `switch node`, based on the room given by the previous node, it will send the message to the associated watchdog:
![Node-RED switch node based on the room for the watchdog](img/node-red-switch-node-room-selector-watchdog.png)
After are the watchdogs, `trigger nodes`, which will delay the message by some time and extend the delay if another message if received:
![Node-RED trigger node for window watchdog](img/node-red-trigger-node-window-watchdog.png)
#### 7. AC Enabled ?
All these triggers are now entering the computing pipeline, to determine what the system must do with the action. But before, it is checking if the automation is even enabled. I add this kill switch, just in case, but I rarely use it anyway.
The first node is a `delay node` which regulate the rate of every incoming messages to 1 per second:
![Node-RED delay node to limit the rate to 1 message per second](img/node-red-delay-node-1-msg-per-second.png)
The second node is a `current state node` which checks if the `climatisation` boolean is enabled:
![Node-RED current state node for climatisation](img/node-red-current-state-node-climatisation-enabled.png)
#### 8. Room Configuration
The idea here is to attach the configuration of the room to the message. Each room have their own configuration, which unit is used, which sensors and more importantly, when should they be turned on and off.
AC units have 4 mode which can be used:
- Cool
- Dry
- Fan
- Heat
To determine which mode should be used, I'm using threshold for each mode and unit fan's speed, with different offset depending the situation. I can then define a offset during the night or when I'm away. I can also set the offset to `disabled`, which will force the unit to shut down.
The first node is a `switch node`, based on the `room` value, which will route the message to the associated room configuration. When the room is `partout` (everywhere), the message is split to all 3 room configuration:
![Node-RED switch node for room configuration](img/node-red-switch-node-room-config.png)
It is connected to a `change node` which will attach the configuration to the `room_config`, here an example with the living-room configuration:
```json
{
"threshold": {
"cool": {
"start": {
"1": 1,
"2": 1.5,
"3": 2,
"4": 2.5,
"quiet": 0
},
"stop": -0.3,
"target": -1,
"offset": {
"absent": 1,
"vacances": "disabled",
"fenetre": "disabled",
"matin": "disabled",
"jour": 0,
"soir": 0,
"nuit": "disabled"
}
},
"dry": {
"start": {
"quiet": -1
},
"stop": -1.5,
"offset": {
"absent": "1.5",
"vacances": "disabled",
"fenetre": "disabled",
"matin": "disabled",
"jour": 0,
"soir": 0,
"nuit": "disabled"
}
},
"fan_only": {
"start": {
"1": -0.3,
"quiet": -0.5
},
"stop": -0.7,
"offset": {
"absent": "disabled",
"vacances": "disabled",
"fenetre": "disabled",
"matin": "disabled",
"jour": 0,
"soir": 0,
"nuit": "disabled"
}
},
"heat": {
"start": {
"1": 0,
"2": -1.5,
"quiet": 0
},
"stop": 1,
"target": 1,
"offset": {
"absent": -1.5,
"vacances": -3,
"fenetre": "disabled",
"matin": 0,
"jour": 0,
"soir": 0,
"nuit": -1.5
}
}
},
"unit": "climate.clim_salon",
"timer": "timer.minuteur_clim_salon",
"window": "binary_sensor.groupe_fenetre_salon",
"thermometre": "sensor.temperature_jaune_temperature",
"humidity": "sensor.temperature_jaune_humidity",
"temp_ete": "input_number.temp_ete_salon",
"temp_hiver": "input_number.temp_hiver_salon"
}
```
#### 9. Computation
Now that the message has the room configuration attached, we are entering in the computation pipeline. We have the AC unit name, the sensor names, the desired base temperature and the offset to apply. From these values, we will fetch the current state and do the maths.
The first node is another `delay node` which regulate the rate of incoming messages, because the previous block could have created 3 messages in all rooms are targeted.
The second is the most important node of the workflow, a `function node` that has multiple tasks:
- Fetch sensor state for Home Assistant
- Calculate mode thresholds with given offset
- Disable modes if conditions are met
- Inject these values in the payload
```js
// --- Helper: Get Home Assistant state by entity ID ---
function getState(entityId) {
return global.get("homeassistant.homeAssistant.states")[entityId]?.state;
}
// --- Determine current time period based on sensors ---
const periods = ["jour", "soir", "nuit", "matin"];
msg.payload.period = periods.find(p => getState(`binary_sensor.${p}`) === 'on') || 'unknown';
// --- Determine presence status (absent = inverse of presence) ---
const vacances = getState("input_boolean.absent");
const absent = getState("input_boolean.presence") === 'on' ? 'off' : 'on';
/**
* Recursively adds the base temperature and offset to all numeric start values in a threshold config
*/
function applyOffsetToThresholds(threshold, baseTemp, globalOffset) {
for (const [key, value] of Object.entries(threshold)) {
if (key === "offset") continue;
if (typeof value === 'object') {
applyOffsetToThresholds(value, baseTemp, globalOffset);
} else {
threshold[key] += baseTemp + globalOffset;
}
}
}
/**
* Calculates the global offset for a mode, based on presence, vacation, window, and time of day
*/
function calculateGlobalOffset(offsets, modeName, windowState, disabledMap) {
let globalOffset = 0;
for (const [key, offsetValue] of Object.entries(offsets)) {
let conditionMet = false;
if (key === msg.payload.period) conditionMet = true;
else if (key === "absent" && absent === 'on') conditionMet = true;
else if (key === "vacances" && vacances === 'on') conditionMet = true;
else if ((key === "fenetre" || key === "window") && windowState === 'on') conditionMet = true;
if (conditionMet) {
if (offsetValue === 'disabled') {
disabledMap[modeName] = true;
return 0; // Mode disabled immediately
}
globalOffset += parseFloat(offsetValue);
}
}
return globalOffset;
}
/**
* Main logic: compute thresholds for the specified room using the provided config
*/
const cfg = msg.payload.room_config;
const room = msg.payload.room;
// Normalize window sensor state
const rawWindow = getState(cfg.window);
const window = rawWindow === 'open' ? 'on' : rawWindow === 'closed' ? 'off' : rawWindow;
// Gather temperatures
const temps = cfg.thermometre.split(',')
.map(id => parseFloat(getState(id)))
.filter(v => !isNaN(v));
const temp_avg = temps.reduce((a, b) => a + b, 0) / temps.length;
const temp_min = Math.min(...temps);
const temp_max = Math.max(...temps);
// Gather humidity
const humidities = cfg.humidity.split(',')
.map(id => parseFloat(getState(id)))
.filter(v => !isNaN(v));
const humidity_avg = humidities.reduce((a, b) => a + b, 0) / humidities.length;
const humidity_min = Math.min(...humidities);
const humidity_max = Math.max(...humidities);
// Get base temps
const temp_ete = parseFloat(getState(cfg.temp_ete));
const temp_hiver = parseFloat(getState(cfg.temp_hiver));
// Process modes
const { threshold } = cfg;
const modes = ["cool", "dry", "fan_only", "heat"];
const disabled = {};
for (const mode of modes) {
const baseTemp = (mode === "heat") ? temp_hiver : temp_ete;
const globalOffset = calculateGlobalOffset(threshold[mode].offset, mode, window, disabled);
applyOffsetToThresholds(threshold[mode], baseTemp, globalOffset);
}
// Final message
msg.payload = {
...msg.payload,
unit: cfg.unit,
timer: cfg.timer,
threshold,
window,
temp: {
min: temp_min,
max: temp_max,
avg: Math.round(temp_avg * 100) / 100
},
humidity: {
min: humidity_min,
max: humidity_max,
avg: Math.round(humidity_avg * 100) / 100
},
disabled
};
return msg;
```
The third node is a `filter node`, which drops subsequent messages with similar payload:
![Node-RED filter node to block similar message](img/node-red-filter-node-blocker.png)
The fourth node checks if any lock is set, with a `current state node`, we verify if the timer associated to the unit is idle. If not, the message is discarded:
![Node-RED current state node for timer lock](img/node-red-current-state-node-lock-timer.png)
The last node is another `current state node` which will fetch the unit state and properties:
![Node-RED current state node to get current unit state](img/node-red-current-state-node-get-unit-state.png)
#### 10. Target State
After the computation, we want to determine what should be the target mode, what action to do to converge from the current mode and, if apply, what should be the fan's speed for that mode.
All three nodes are `function nodes`, the first one decides what should be the target mode, between: `off`, `cool`, `dry`, `fan_only` and `heat`:
```js
const minHumidityThreshold = 52;
const maxHumidityThreshold = 57;
// Helper: check if mode can be activated or stopped
function isModeEligible(mode, temps, humidity, thresholds, currentMode) {
const isCurrent = (mode === currentMode);
const threshold = thresholds[mode];
if (msg.payload.disabled?.[mode]) return false;
// Determine which temperature to use for start/stop:
// start: temp.max (except heat uses temp.min)
// stop: temp.avg
let tempForCheckStart;
if (mode === "heat") {
tempForCheckStart = temps.min; // heat start uses min temp
} else {
tempForCheckStart = temps.max; // others start use max temp
}
const tempForCheckStop = temps.avg;
// Dry mode also depends on humidity thresholds
// humidity max for start, humidity avg for stop
let humidityForCheckStart = humidity.max;
let humidityForCheckStop = humidity.avg;
// For heat mode (inverted logic)
if (mode === "heat") {
if (!isCurrent) {
const minStart = Math.min(...Object.values(threshold.start));
return tempForCheckStart < minStart;
} else {
return tempForCheckStop < threshold.stop;
}
}
// For dry mode (humidity-dependent)
if (mode === "dry") {
// Skip if humidity too low
if (humidityForCheckStart <= (isCurrent ? minHumidityThreshold : maxHumidityThreshold)) return false;
const minStart = Math.min(...Object.values(threshold.start));
if (!isCurrent) {
return tempForCheckStart >= minStart;
} else {
return tempForCheckStop >= threshold.stop;
}
}
// For cool and fan_only
if (!isCurrent) {
const minStart = Math.min(...Object.values(threshold.start));
return tempForCheckStart >= minStart;
} else {
return tempForCheckStop >= threshold.stop;
}
}
// --- Main logic ---
const { threshold, temp, humidity, current_mode, disabled } = msg.payload;
const priority = ["cool", "dry", "fan_only", "heat"];
let target_mode = "off";
// Loop through priority list and stop at the first eligible mode
for (const mode of priority) {
if (isModeEligible(mode, temp, humidity, threshold, current_mode)) {
target_mode = mode;
break;
}
}
msg.payload.target_mode = target_mode;
if (target_mode === "cool" || target_mode === "heat") {
msg.payload.set_temp = true;
}
return msg;
```
The second compares the current and target node and pick which action to take:
- **check**: current and target are the same.
- **start**: the AC unit is currently off, but the target is different.
- **change**: the AC unit is on, the target mode is different, but not `off`.
- **stop**: the AC unit is on and it is required to stop it.
```js
let action = "check"; // default if both are same
if (msg.payload.current_mode === "off" && msg.payload.target_mode !== "off") {
action = "start";
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode !== "off" && msg.payload.current_mode !== msg.payload.target_mode) {
action = "change";
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode === "off") {
action = "stop";
}
msg.payload.action = action;
return msg;
```
The last node determines the fan's speed of the target mode based on thresholds:
```js
// Function to find the appropriate speed key based on temperature and mode
function findSpeed(thresholdStart, temperature, mode) {
let closestSpeed = 'quiet';
let closestTemp = mode === 'heat' ? Infinity : -Infinity;
for (const speedKey in thresholdStart) {
if (speedKey !== 'quiet') {
const tempValue = thresholdStart[speedKey];
if (mode === 'heat') {
if (tempValue >= temperature && tempValue <= closestTemp) {
closestSpeed = speedKey;
closestTemp = tempValue;
}
} else { // cool, fan_only
if (tempValue <= temperature && tempValue >= closestTemp) {
closestSpeed = speedKey;
closestTemp = tempValue;
}
}
}
}
return closestSpeed;
}
if (msg.payload.target_mode && msg.payload.target_mode !== "off" && msg.payload.target_mode !== "dry") {
const modeData = msg.payload.threshold[msg.payload.target_mode];
if (modeData && modeData.start) {
if (msg.payload.target_mode === "heat") {
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.min, 'heat');
} else {
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.max, 'cool');
}
} else {
node.error("Invalid mode data or missing 'start' thresholds", msg);
}
} else {
// No need for speed in 'off' or 'dry' modes
msg.payload.speed = null;
}
return msg;
```
#### 11. Action Switch
Based on the action to take, the `switch node` will route the message accordingly:
![Node-RED `switch node` pour sélectionner laction](img/node-red-switch-node-select-action.png)
#### 12. Start
When the action is `start`, we first need to turn the unit online, while this takes between 20 to 40 seconds depending on the unit model, it is also locking the unit for a short period for future messages.
The first node is a `call service node` using the `turn_on` service on the AC unit:
![Node-RED call service node with turn_on service](img/node-red-call-service-node-turn-on.png)
The second node is another `call service node` which will start the lock timer of this unit for 45 seconds:
![Node-RED call service node to start the unit timer](img/node-red-call-service-node-start-timer.png)
The last one is a `delay node` of 5 seconds, to give the time to the Home Assistant Daikin integration to resolve the new state.
#### 13. Change
The `change` action is used to change from one mode to another, but also used right after the start action.
The first node is a `call service node` using `the set_hvac_mode` service on the AC unit:
![Node-RED call service node with set_hvac_mode service](img/node-red-call-service-node-set-hvac-mode.png)
The following node is another delay of 5 seconds.
The last one verify with a `switch node` if the target temperature needs to be set, this is only required for the modes `cool` and `heat`:
![Node-RED switch node for set_temp](img/node-red-switch-node-set-temp.png)
#### 14. Set Target Temperature
The target temperature is only relevant for `cool` and `heat` mode, when you use a normal AC unit, you define a temperature to reach. This is exactly what is defined here. But because each unit is using its own internal sensor to verify, I don't trust it. If the value is already reached, the unit won't blow anything.
The first node is another `call service node` using the `set_temperature` service:
![Node-RED call service node with set_temperature service](img/node-red-call-service-node-set-temperature-service.png)
Again, this node is followed by a `delay node` of 5 seconds
#### 15. Check
The `check` action is almost used everytime, it is actually only checks and compare the desired fan speed, it changes the fan speed if needed.
The first node is a `switch node` which verify if the `speed` is defined:
![Node-RED switch node to test if speed is defined](img/node-red-switch-node-fan-speed.png)
The second is another `switch node` to compare the `speed` value with the current speed:
![Node-Red switch node to compare speed](img/node-red-switch-node-compare-speed.png)
Finally the last node is a `call service node` using the `set_fan_mode` to set the fan speed:
![Node-RED call service node with set_fan_mode](img/node-red-call-service-node-set-fan-mode.png)
#### 16. Stop
When the `action` is stop, the AC unit is simply turned off
The first node is a `call service noded` using the service `turn_off`:
![Node-RED call service node with turn_off service](img/node-red-call-service-node-turn-off.png)
The second node is another `call service node` which will start the lock timer of this unit for 45 seconds
#### 17. Manual Intervention
Sometime, for some reason, we want to use the AC manually. When we do, we don't want the workflow to change our manual setting, at least for some time. Node-RED is using its own user in Home Assistant, so when an AC unit change state without this user, this was manually done.
The first node is a `trigger state node`, which will send a message when any AC unit is changing state:
![node-red-trigger-state-unit-change.png](img/node-red-trigger-state-unit-change.png)
The second is a `function node` which willassociate the unit with its timer:
```js
const association = {
"climate.clim_salon": "timer.minuteur_clim_salon",
"climate.clim_chambre": "timer.minuteur_clim_chambre",
"climate.clim_couloir": "timer.minuteur_clim_couloir"
};
msg.payload = association[msg.topic];
return msg;
```
The third is a `switch node` that will let through the message when the user_id is not the Node-RED user's one:
![Node-RED switch node not specific user_id](img/node-red-switch-node-user-id.png)
The fourth is another `switch node` which checks if there are any `user_id`:
![Node-RED switch node check user_id not null](img/node-red-switch-node-check-user-id.png)
Lastly, the final node is a `call service node` using `start` service on the unit's timer with its default duration (60 minutes):
![Node-RED call service node start timer with default duration](img/node-red-call-service-node-start-unit-timer.png)
## TL;DR
With this setup, my AC system is fully automated, from cooling in summer to warming in winter, while keeping in check the humidity level.
This required quite a lot of thinking, tweaking and testing, but finally I'm now very happy with the results, that's why I'm sharing it with you, to give you some ideas about what you can do in home automation.
If you think I could have done things differently, please reach out to me to discuss about it, do not hesitate to share your ideas as well!

View File

@@ -1,757 +0,0 @@
---
slug: terraform-create-proxmox-module
title: Créer un Module Terraform pour Proxmox
description: Transformez votre code VM Proxmox en module Terraform réutilisable et apprenez à déployer à l'échelle sur plusieurs nœuds.
date: 2025-07-04
draft: false
tags:
- terraform
- proxmox
- cloud-init
categories:
- homelab
---
## Intro
Dans un [article précédent]({{< ref "post/3-terraform-create-vm-proxmox" >}}), jexpliquais comment déployer des **machines virtuelles** sur **Proxmox** à laide de **Terraform**, en partant dun [template cloud-init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
Dans ce post, nous allons transformer ce code en un **module Terraform** réutilisable. Ensuite, je montrerai comment utiliser ce module dans d'autres projets pour simplifier et faire évoluer vos déploiements d'infrastructure.
---
## Quest-ce quun Module Terraform ?
Les modules Terraform sont des composants réutilisables qui permettent dorganiser et de simplifier votre code dinfrastructure en regroupant des ressources liées dans une seule unité. Au lieu de répéter la même configuration à plusieurs endroits, vous pouvez la définir une fois dans un module, puis lutiliser là où vous en avez besoin, comme une fonction en programmation.
Les modules peuvent être locaux (dans votre projet) ou distants (depuis le Terraform Registry ou un dépôt Git), ce qui facilite le partage et la standardisation des patterns dinfrastructure entre les équipes ou projets. Grâce aux modules, votre code devient plus lisible, maintenable et évolutif.
---
## Transformer le Projet en Module
Nous allons maintenant extraire le code Terraform du [projet précédent]({{< ref "post/3-terraform-create-vm-proxmox" >}}) pour en faire un module réutilisable nommé `pve_vm`.
> 📌 Vous pouvez retrouver le code source complet dans mon [dépôt Homelab](https://github.com/Vezpi/Homelab/). Le code spécifique à cet article se trouve [ici](https://github.com/Vezpi/Homelab/tree/3a991010d5e9de30e12cbf365d1a1ca1ff1f6436/terraform). Pensez à adapter les variables à votre environnement.
### Structure du Code
Notre module vivra à côté des projets, dans un dossier séparé.
```plaintext
terraform
`-- modules
`-- pve_vm
|-- main.tf
|-- provider.tf
`-- variables.tf
```
### Code du Module
📝 Les fichiers du module sont essentiellement les mêmes que ceux du projet que nous transformons. Les providers y sont déclarés, mais non configurés.
Le module `pve_vm` sera composé de 3 fichiers :
- **main** : la logique principale, identique à celle du projet.
- **provider** : déclare les providers requis, sans les configurer.
- **variables** : déclare les variables du module, en excluant celles propres au provider.
#### `main.tf`
```hcl
# Retrieve VM templates available in Proxmox that match the specified name
data "proxmox_virtual_environment_vms" "template" {
filter {
name = "name"
values = ["${var.vm_template}"] # The name of the template to clone from
}
}
# Create a cloud-init configuration file as a Proxmox snippet
resource "proxmox_virtual_environment_file" "cloud_config" {
content_type = "snippets" # Cloud-init files are stored as snippets in Proxmox
datastore_id = "local" # Local datastore used to store the snippet
node_name = var.node_name # The Proxmox node where the file will be uploaded
source_raw {
file_name = "${var.vm_name}.cloud-config.yaml" # The name of the snippet file
data = <<-EOF
#cloud-config
hostname: ${var.vm_name}
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: ${var.vm_user}
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "${var.vm_user_sshkey}" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOF
}
}
# Define and provision a new VM by cloning the template and applying initialization
resource "proxmox_virtual_environment_vm" "vm" {
name = var.vm_name # VM name
node_name = var.node_name # Proxmox node to deploy the VM
tags = var.vm_tags # Optional VM tags for categorization
agent {
enabled = true # Enable the QEMU guest agent
}
stop_on_destroy = true # Ensure VM is stopped gracefully when destroyed
clone {
vm_id = data.proxmox_virtual_environment_vms.template.vms[0].vm_id # ID of the source template
node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
}
bios = var.vm_bios # BIOS type (e.g., seabios or ovmf)
machine = var.vm_machine # Machine type (e.g., q35)
cpu {
cores = var.vm_cpu # Number of CPU cores
type = "host" # Use host CPU type for best compatibility/performance
}
memory {
dedicated = var.vm_ram # RAM in MB
}
disk {
datastore_id = var.node_datastore # Datastore to hold the disk
interface = "scsi0" # Primary disk interface
size = 4 # Disk size in GB
}
initialization {
user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
datastore_id = var.node_datastore
interface = "scsi1" # Separate interface for cloud-init
ip_config {
ipv4 {
address = "dhcp" # Get IP via DHCP
}
}
}
network_device {
bridge = "vmbr0" # Use the default bridge
vlan_id = var.vm_vlan # VLAN tagging if used
}
operating_system {
type = "l26" # Linux 2.6+ kernel
}
vga {
type = "std" # Standard VGA type
}
lifecycle {
ignore_changes = [ # Ignore initialization section after first depoloyment for idempotency
initialization
]
}
}
# Output the assigned IP address of the VM after provisioning
output "vm_ip" {
value = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
description = "VM IP"
}
```
#### `provider.tf`
```hcl
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
}
}
}
```
#### `variables.tf`
> ⚠️ The defaults are based on my environment, adapt them to yours.
```hcl
variable "node_name" {
description = "Proxmox host for the VM"
type = string
}
variable "node_datastore" {
description = "Datastore used for VM storage"
type = string
default = "ceph-workload"
}
variable "vm_template" {
description = "Template of the VM"
type = string
default = "ubuntu-cloud"
}
variable "vm_name" {
description = "Hostname of the VM"
type = string
}
variable "vm_user" {
description = "Admin user of the VM"
type = string
default = "vez"
}
variable "vm_user_sshkey" {
description = "Admin user SSH key of the VM"
type = string
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
}
variable "vm_cpu" {
description = "Number of CPU cores of the VM"
type = number
default = 1
}
variable "vm_ram" {
description = "Number of RAM (MB) of the VM"
type = number
default = 2048
}
variable "vm_bios" {
description = "Type of BIOS used for the VM"
type = string
default = "ovmf"
}
variable "vm_machine" {
description = "Type of machine used for the VM"
type = string
default = "q35"
}
variable "vm_vlan" {
description = "VLAN of the VM"
type = number
default = 66
}
variable "vm_tags" {
description = "Tags for the VM"
type = list(any)
default = ["test"]
}
```
---
## Déployer une VM à laide du Module
Maintenant que nous avons extrait toute la logique dans le module `pve_vm`, notre projet na plus quà appeler ce module en lui passant les variables nécessaires. Cela rend la configuration bien plus propre et facile à maintenir.
### Structure du Code
Voici à quoi cela ressemble :
```plaintext
terraform
|-- modules
| `-- pve_vm
| |-- main.tf
| |-- provider.tf
| `-- variables.tf
`-- projects
`-- simple-vm-with-module
|-- credentials.auto.tfvars
|-- main.tf
|-- provider.tf
`-- variables.tf
```
### Code du projet
Dans cet exemple, je fournis manuellement les valeurs lors de lappel du module. Le provider est configuré au niveau du projet.
#### `main.tf`
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
node_name = "zenith"
vm_name = "zenith-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
output "vm_ip" {
value = module.pve_vm.vm_ip
}
```
#### `provider.tf`
```hcl
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
}
}
}
provider "proxmox" {
endpoint = var.proxmox_endpoint
api_token = var.proxmox_api_token
insecure = false
ssh {
agent = false
private_key = file("~/.ssh/id_ed25519")
username = "root"
}
}
```
#### `variables.tf`
```hcl
variable "proxmox_endpoint" {
description = "Proxmox URL endpoint"
type = string
}
variable "proxmox_api_token" {
description = "Proxmox API token"
type = string
sensitive = true
}
```
#### `credentials.auto.tfvars`
```hcl
proxmox_endpoint = <your Proxox endpoint>
proxmox_api_token = <your Proxmox API token for the user terraformer>
```
### Initialiser le Workspace Terraform
Dans notre nouveau projet, il faut dabord initialiser lenvironnement Terraform avec `terraform init` :
```bash
$ terraform init
Initializing the backend...
Initializing modules...
- pve_vm in ../../modules/pve_vm
Initializing provider plugins...
- Finding latest version of bpg/proxmox...
- Installing bpg/proxmox v0.78.2...
- Installed bpg/proxmox v0.78.2 (self-signed, key ID F0582AD6AE97C188)
Partner and community providers are signed by their developers.
If you'd like to know more about provider signing, you can read about it here:
https://www.terraform.io/docs/cli/plugins/signing.html
Terraform has created a lock file .terraform.lock.hcl to record the provider
selections it made above. Include this file in your version control repository
so that Terraform can guarantee to make the same selections by default when
you run "terraform init" in the future.
Terraform has been successfully initialized!
You may now begin working with Terraform. Try running "terraform plan" to see
any changes that are required for your infrastructure. All Terraform commands
should now work.
If you ever set or change modules or backend configuration for Terraform,
rerun this command to reinitialize your working directory. If you forget, other
commands will detect it and remind you to do so if necessary.
```
### Déployer la VM
Avant le déploiement, vérifiez que tout est correct avec `terraform plan`.
Une fois prêt, lancez le déploiement avec `terraform apply` :
```bash
$ terraform apply
module.pve_vm.data.proxmox_virtual_environment_vms.template: Reading...
module.pve_vm.data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=89b444be-7501-4538-9436-08609b380d39]
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
+ create
Terraform will perform the following actions:
# module.pve_vm.proxmox_virtual_environment_file.cloud_config will be created
+ resource "proxmox_virtual_environment_file" "cloud_config" {
+ content_type = "snippets"
+ datastore_id = "local"
+ file_modification_date = (known after apply)
+ file_name = (known after apply)
+ file_size = (known after apply)
+ file_tag = (known after apply)
+ id = (known after apply)
+ node_name = "zenith"
+ overwrite = true
+ timeout_upload = 1800
+ source_raw {
+ data = <<-EOT
#cloud-config
hostname: zenith-vm
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent
users:
- default
- name: vez
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot
EOT
+ file_name = "zenith-vm.cloud-config.yaml"
+ resize = 0
}
}
# module.pve_vm.proxmox_virtual_environment_vm.vm will be created
+ resource "proxmox_virtual_environment_vm" "vm" {
+ acpi = true
+ bios = "ovmf"
+ id = (known after apply)
+ ipv4_addresses = (known after apply)
+ ipv6_addresses = (known after apply)
+ keyboard_layout = "en-us"
+ mac_addresses = (known after apply)
+ machine = "q35"
+ migrate = false
+ name = "zenith-vm"
+ network_interface_names = (known after apply)
+ node_name = "zenith"
+ on_boot = true
+ protection = false
+ reboot = false
+ reboot_after_update = true
+ scsi_hardware = "virtio-scsi-pci"
+ started = true
+ stop_on_destroy = true
+ tablet_device = true
+ tags = [
+ "test",
]
+ template = false
+ timeout_clone = 1800
+ timeout_create = 1800
+ timeout_migrate = 1800
+ timeout_move_disk = 1800
+ timeout_reboot = 1800
+ timeout_shutdown_vm = 1800
+ timeout_start_vm = 1800
+ timeout_stop_vm = 300
+ vm_id = (known after apply)
+ agent {
+ enabled = true
+ timeout = "15m"
+ trim = false
+ type = "virtio"
}
+ clone {
+ full = true
+ node_name = "apex"
+ retries = 1
+ vm_id = 900
}
+ cpu {
+ cores = 2
+ hotplugged = 0
+ limit = 0
+ numa = false
+ sockets = 1
+ type = "host"
+ units = 1024
}
+ disk {
+ aio = "io_uring"
+ backup = true
+ cache = "none"
+ datastore_id = "ceph-workload"
+ discard = "ignore"
+ file_format = (known after apply)
+ interface = "scsi0"
+ iothread = false
+ path_in_datastore = (known after apply)
+ replicate = true
+ size = 4
+ ssd = false
}
+ initialization {
+ datastore_id = "ceph-workload"
+ interface = "scsi1"
+ meta_data_file_id = (known after apply)
+ network_data_file_id = (known after apply)
+ type = (known after apply)
+ user_data_file_id = (known after apply)
+ vendor_data_file_id = (known after apply)
+ ip_config {
+ ipv4 {
+ address = "dhcp"
}
}
}
+ memory {
+ dedicated = 2048
+ floating = 0
+ keep_hugepages = false
+ shared = 0
}
+ network_device {
+ bridge = "vmbr0"
+ enabled = true
+ firewall = false
+ mac_address = (known after apply)
+ model = "virtio"
+ mtu = 0
+ queues = 0
+ rate_limit = 0
+ vlan_id = 66
}
+ operating_system {
+ type = "l26"
}
+ vga {
+ memory = 16
+ type = "std"
}
}
Plan: 2 to add, 0 to change, 0 to destroy.
Changes to Outputs:
+ vm_ip = (known after apply)
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creating...
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/zenith-vm.cloud-config.yaml]
module.pve_vm.proxmox_virtual_environment_vm.vm: Creating...
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Creation complete after 3m13s [id=103]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.159"
```
✅ La VM est maintenant prête !
![VM on Proxmox WebUI deployed using a Terraform module](img/proxmox-vm-deployed-using-terraform-module.png)
🕗 _Ne faites pas attention à luptime, jai pris la capture décran le lendemain._
---
## Déployer Plusieurs VMs à la fois
Très bien, on a déployé une seule VM. Mais maintenant, comment passer à léchelle ? Comment déployer plusieurs instances de ce template, avec des noms différents, sur des nœuds différents, et avec des tailles différentes ? Cest ce que je vais vous montrer.
### Une VM par Nœud
Dans lexemple précédent, nous avons passé des valeurs fixes au module. À la place, nous pouvons définir un objet local contenant les caractéristiques de la VM, puis sen servir lors de lappel au module. Cela facilite lévolution du code de déploiement :
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
node_name = local.vm.node_name
vm_name = local.vm.vm_name
vm_cpu = local.vm.vm_cpu
vm_ram = local.vm.vm_ram
vm_vlan = local.vm.vm_vlan
}
locals {
vm = {
node_name = "zenith"
vm_name = "zenith-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
}
```
Nous pouvons également appeler le module en itérant sur une liste dobjets définissant les VMs à déployer :
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
for_each = local.vm_list
node_name = each.value.node_name
vm_name = each.value.vm_name
vm_cpu = each.value.vm_cpu
vm_ram = each.value.vm_ram
vm_vlan = each.value.vm_vlan
}
locals {
vm_list = {
zenith = {
node_name = "zenith"
vm_name = "zenith-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
}
}
```
Bien que cela n'ait pas de sens avec une seule VM, je pourrais utiliser cette syntaxe de module, par exemple, pour déployer une machine virtuelle par nœud :
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
for_each = local.vm_list
node_name = each.value.node_name
vm_name = each.value.vm_name
vm_cpu = each.value.vm_cpu
vm_ram = each.value.vm_ram
vm_vlan = each.value.vm_vlan
}
locals {
vm_list = {
for vm in flatten([
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : {
node_name = node
vm_name = "${node}-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
]) : vm.vm_name => vm
}
}
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
output "vm_ip" {
value = { for k, v in module.pve_vm : k => v.vm_ip }
}
```
✅ Cela permet de déployer automatiquement 3 VM dans mon cluster, une par nœud.
### Plusieurs VMs par Nœud
Enfin, poussons lidée plus loin : déployons plusieurs VMs avec des configurations différentes par nœud. Pour cela, on définit un ensemble de rôles et on utilise une boucle imbriquée pour générer toutes les combinaisons possibles pour chaque nœud Proxmox.
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
for_each = local.vm_list
node_name = each.value.node_name
vm_name = each.value.vm_name
vm_cpu = each.value.vm_cpu
vm_ram = each.value.vm_ram
vm_vlan = each.value.vm_vlan
}
locals {
vm_attr = {
"master" = { ram = 2048, cpu = 2, vlan = 66 }
"worker" = { ram = 1024, cpu = 1, vlan = 66 }
}
vm_list = {
for vm in flatten([
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : [
for role, config in local.vm_attr : {
node_name = node
vm_name   = "${node}-${role}"
vm_cpu = config.cpu
vm_ram = config.ram
vm_vlan = config.vlan
}
]
]) : vm.vm_name => vm
}
}
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
output "vm_ip" {
value = { for k, v in module.pve_vm : k => v.vm_ip }
}
```
🚀 Une fois le `terraform apply` lancé, j'obtiens ça :
```bash
Apply complete! Resources: 6 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = {
"apex-master" = "192.168.66.167"
"apex-worker" = "192.168.66.168"
"vertex-master" = "192.168.66.169"
"vertex-worker" = "192.168.66.170"
"zenith-master" = "192.168.66.166"
"zenith-worker" = "192.168.66.172"
}
```
---
## Conclusion
Nous avons transformé notre déploiement de VM Proxmox en un module Terraform réutilisable, et nous lavons utilisé pour faire évoluer facilement notre infrastructure sur plusieurs nœuds.
Dans un prochain article, jaimerais combiner Terraform avec Ansible afin de gérer le déploiement des VMs, et même explorer lutilisation de différents workspaces Terraform pour gérer plusieurs environnements.
A la prochaine !

View File

@@ -1,750 +0,0 @@
---
slug: terraform-create-proxmox-module
title: Create a Terraform module for Proxmox
description: Turn your Proxmox VM code into a reusable Terraform module and learn how to scale deployments across multiple nodes.
date: 2025-07-04
draft: false
tags:
- terraform
- proxmox
- cloud-init
categories:
- homelab
---
## Intro
In a [previous post]({{< ref "post/3-terraform-create-vm-proxmox" >}}), I explained how to deploy **Virtual Machines** on **Proxmox** using **Terraform**, building from a [cloud-init template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
In this post, well take that code and turn it into a reusable **Terraform module**. Then, Ill show how to use that module in other projects to simplify and scale your infrastructure deployments.
---
## What is a Terraform Module?
Terraform modules are reusable components that let you organize and simplify your infrastructure code by grouping related resources into a single unit. Instead of repeating the same configuration across multiple places, you can define it once in a module and use it wherever needed, just like calling a function in programming.
Modules can be local (within your project) or remote (from the Terraform Registry or a Git repository), making it easy to share and standardize infrastructure patterns across teams or projects. By using modules, you make your code more readable, maintainable, and scalable.
---
## Transform Project into Module
We're now going to extract the Terraform code from the [previous project]({{< ref "post/3-terraform-create-vm-proxmox" >}}) into a reusable module named `pve_vm`.
> 📌 You can find the full source code in my [Homelab repo](https://github.com/Vezpi/Homelab/). The specific code for this post lives [here](https://github.com/Vezpi/Homelab/tree/3a991010d5e9de30e12cbf365d1a1ca1ff1f6436/terraform). Make sure to adjust the variables to match your environment.
### Code Structure
Our module will live next to our projects, in another folder:
```plaintext
terraform
`-- modules
`-- pve_vm
|-- main.tf
|-- provider.tf
`-- variables.tf
```
### Module's Code
📝 Basically, the module files are the same as the project files we are transforming. Providers are declared, but not configured, inside the module.
The module `pve_vm` will be composed of 3 files:
- **main**: The core logic, same code as before.
- **provider**: Declares required providers without configuration.
- **variables**: Declares module variables, excluding provider-specific ones.
#### `main.tf`
```hcl
# Retrieve VM templates available in Proxmox that match the specified name
data "proxmox_virtual_environment_vms" "template" {
filter {
name = "name"
values = ["${var.vm_template}"] # The name of the template to clone from
}
}
# Create a cloud-init configuration file as a Proxmox snippet
resource "proxmox_virtual_environment_file" "cloud_config" {
content_type = "snippets" # Cloud-init files are stored as snippets in Proxmox
datastore_id = "local" # Local datastore used to store the snippet
node_name = var.node_name # The Proxmox node where the file will be uploaded
source_raw {
file_name = "${var.vm_name}.cloud-config.yaml" # The name of the snippet file
data = <<-EOF
#cloud-config
hostname: ${var.vm_name}
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent # Ensures the guest agent is installed
users:
- default
- name: ${var.vm_user}
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "${var.vm_user_sshkey}" # Inject user's SSH key
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot # Reboot the VM after provisioning
EOF
}
}
# Define and provision a new VM by cloning the template and applying initialization
resource "proxmox_virtual_environment_vm" "vm" {
name = var.vm_name # VM name
node_name = var.node_name # Proxmox node to deploy the VM
tags = var.vm_tags # Optional VM tags for categorization
agent {
enabled = true # Enable the QEMU guest agent
}
stop_on_destroy = true # Ensure VM is stopped gracefully when destroyed
clone {
vm_id = data.proxmox_virtual_environment_vms.template.vms[0].vm_id # ID of the source template
node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
}
bios = var.vm_bios # BIOS type (e.g., seabios or ovmf)
machine = var.vm_machine # Machine type (e.g., q35)
cpu {
cores = var.vm_cpu # Number of CPU cores
type = "host" # Use host CPU type for best compatibility/performance
}
memory {
dedicated = var.vm_ram # RAM in MB
}
disk {
datastore_id = var.node_datastore # Datastore to hold the disk
interface = "scsi0" # Primary disk interface
size = 4 # Disk size in GB
}
initialization {
user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
datastore_id = var.node_datastore
interface = "scsi1" # Separate interface for cloud-init
ip_config {
ipv4 {
address = "dhcp" # Get IP via DHCP
}
}
}
network_device {
bridge = "vmbr0" # Use the default bridge
vlan_id = var.vm_vlan # VLAN tagging if used
}
operating_system {
type = "l26" # Linux 2.6+ kernel
}
vga {
type = "std" # Standard VGA type
}
lifecycle {
ignore_changes = [ # Ignore initialization section after first depoloyment for idempotency
initialization
]
}
}
# Output the assigned IP address of the VM after provisioning
output "vm_ip" {
value = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
description = "VM IP"
}
```
#### `provider.tf`
```hcl
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
}
}
}
```
#### `variables.tf`
> ⚠️ The defaults are based on my environment, adapt them to yours.
```hcl
variable "node_name" {
description = "Proxmox host for the VM"
type = string
}
variable "node_datastore" {
description = "Datastore used for VM storage"
type = string
default = "ceph-workload"
}
variable "vm_template" {
description = "Template of the VM"
type = string
default = "ubuntu-cloud"
}
variable "vm_name" {
description = "Hostname of the VM"
type = string
}
variable "vm_user" {
description = "Admin user of the VM"
type = string
default = "vez"
}
variable "vm_user_sshkey" {
description = "Admin user SSH key of the VM"
type = string
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
}
variable "vm_cpu" {
description = "Number of CPU cores of the VM"
type = number
default = 1
}
variable "vm_ram" {
description = "Number of RAM (MB) of the VM"
type = number
default = 2048
}
variable "vm_bios" {
description = "Type of BIOS used for the VM"
type = string
default = "ovmf"
}
variable "vm_machine" {
description = "Type of machine used for the VM"
type = string
default = "q35"
}
variable "vm_vlan" {
description = "VLAN of the VM"
type = number
default = 66
}
variable "vm_tags" {
description = "Tags for the VM"
type = list(any)
default = ["test"]
}
```
---
## Deploy a VM Using our Module
Now that weve extracted all the logic into the `pve_vm` module, our project code only needs to reference that module and pass the required variables. This makes our setup much cleaner and easier to maintain.
### Code Structure
Here what it looks like:
```plaintext
terraform
|-- modules
| `-- pve_vm
| |-- main.tf
| |-- provider.tf
| `-- variables.tf
`-- projects
`-- simple-vm-with-module
|-- credentials.auto.tfvars
|-- main.tf
|-- provider.tf
`-- variables.tf
```
### Project's Code
In this example, I manually provide the values when calling my module. The provider is configured at project level.
#### `main.tf`
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
node_name = "zenith"
vm_name = "zenith-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
output "vm_ip" {
value = module.pve_vm.vm_ip
}
```
#### `provider.tf`
```hcl
terraform {
required_providers {
proxmox = {
source = "bpg/proxmox"
}
}
}
provider "proxmox" {
endpoint = var.proxmox_endpoint
api_token = var.proxmox_api_token
insecure = false
ssh {
agent = false
private_key = file("~/.ssh/id_ed25519")
username = "root"
}
}
```
#### `variables.tf`
```hcl
variable "proxmox_endpoint" {
description = "Proxmox URL endpoint"
type = string
}
variable "proxmox_api_token" {
description = "Proxmox API token"
type = string
sensitive = true
}
```
#### `credentials.auto.tfvars`
```hcl
proxmox_endpoint = <your Proxox endpoint>
proxmox_api_token = <your Proxmox API token for the user terraformer>
```
### Initialize the Terraform Workspace
In our new project, we first need to initialize the Terraform workspace with `terraform init`:
```bash
$ terraform init
Initializing the backend...
Initializing modules...
- pve_vm in ../../modules/pve_vm
Initializing provider plugins...
- Finding latest version of bpg/proxmox...
- Installing bpg/proxmox v0.78.2...
- Installed bpg/proxmox v0.78.2 (self-signed, key ID F0582AD6AE97C188)
Partner and community providers are signed by their developers.
If you'd like to know more about provider signing, you can read about it here:
https://www.terraform.io/docs/cli/plugins/signing.html
Terraform has created a lock file .terraform.lock.hcl to record the provider
selections it made above. Include this file in your version control repository
so that Terraform can guarantee to make the same selections by default when
you run "terraform init" in the future.
Terraform has been successfully initialized!
You may now begin working with Terraform. Try running "terraform plan" to see
any changes that are required for your infrastructure. All Terraform commands
should now work.
If you ever set or change modules or backend configuration for Terraform,
rerun this command to reinitialize your working directory. If you forget, other
commands will detect it and remind you to do so if necessary.
```
### Deploy the VM
Before deploying it, make sure that everything is ok with a `terraform plan`.
Once ready, you can deploy it with `terraform apply`:
```bash
$ terraform apply
module.pve_vm.data.proxmox_virtual_environment_vms.template: Reading...
module.pve_vm.data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=89b444be-7501-4538-9436-08609b380d39]
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
+ create
Terraform will perform the following actions:
# module.pve_vm.proxmox_virtual_environment_file.cloud_config will be created
+ resource "proxmox_virtual_environment_file" "cloud_config" {
+ content_type = "snippets"
+ datastore_id = "local"
+ file_modification_date = (known after apply)
+ file_name = (known after apply)
+ file_size = (known after apply)
+ file_tag = (known after apply)
+ id = (known after apply)
+ node_name = "zenith"
+ overwrite = true
+ timeout_upload = 1800
+ source_raw {
+ data = <<-EOT
#cloud-config
hostname: zenith-vm
package_update: true
package_upgrade: true
packages:
- qemu-guest-agent
users:
- default
- name: vez
groups: sudo
shell: /bin/bash
ssh-authorized-keys:
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
sudo: ALL=(ALL) NOPASSWD:ALL
runcmd:
- systemctl enable qemu-guest-agent
- reboot
EOT
+ file_name = "zenith-vm.cloud-config.yaml"
+ resize = 0
}
}
# module.pve_vm.proxmox_virtual_environment_vm.vm will be created
+ resource "proxmox_virtual_environment_vm" "vm" {
+ acpi = true
+ bios = "ovmf"
+ id = (known after apply)
+ ipv4_addresses = (known after apply)
+ ipv6_addresses = (known after apply)
+ keyboard_layout = "en-us"
+ mac_addresses = (known after apply)
+ machine = "q35"
+ migrate = false
+ name = "zenith-vm"
+ network_interface_names = (known after apply)
+ node_name = "zenith"
+ on_boot = true
+ protection = false
+ reboot = false
+ reboot_after_update = true
+ scsi_hardware = "virtio-scsi-pci"
+ started = true
+ stop_on_destroy = true
+ tablet_device = true
+ tags = [
+ "test",
]
+ template = false
+ timeout_clone = 1800
+ timeout_create = 1800
+ timeout_migrate = 1800
+ timeout_move_disk = 1800
+ timeout_reboot = 1800
+ timeout_shutdown_vm = 1800
+ timeout_start_vm = 1800
+ timeout_stop_vm = 300
+ vm_id = (known after apply)
+ agent {
+ enabled = true
+ timeout = "15m"
+ trim = false
+ type = "virtio"
}
+ clone {
+ full = true
+ node_name = "apex"
+ retries = 1
+ vm_id = 900
}
+ cpu {
+ cores = 2
+ hotplugged = 0
+ limit = 0
+ numa = false
+ sockets = 1
+ type = "host"
+ units = 1024
}
+ disk {
+ aio = "io_uring"
+ backup = true
+ cache = "none"
+ datastore_id = "ceph-workload"
+ discard = "ignore"
+ file_format = (known after apply)
+ interface = "scsi0"
+ iothread = false
+ path_in_datastore = (known after apply)
+ replicate = true
+ size = 4
+ ssd = false
}
+ initialization {
+ datastore_id = "ceph-workload"
+ interface = "scsi1"
+ meta_data_file_id = (known after apply)
+ network_data_file_id = (known after apply)
+ type = (known after apply)
+ user_data_file_id = (known after apply)
+ vendor_data_file_id = (known after apply)
+ ip_config {
+ ipv4 {
+ address = "dhcp"
}
}
}
+ memory {
+ dedicated = 2048
+ floating = 0
+ keep_hugepages = false
+ shared = 0
}
+ network_device {
+ bridge = "vmbr0"
+ enabled = true
+ firewall = false
+ mac_address = (known after apply)
+ model = "virtio"
+ mtu = 0
+ queues = 0
+ rate_limit = 0
+ vlan_id = 66
}
+ operating_system {
+ type = "l26"
}
+ vga {
+ memory = 16
+ type = "std"
}
}
Plan: 2 to add, 0 to change, 0 to destroy.
Changes to Outputs:
+ vm_ip = (known after apply)
Do you want to perform these actions?
Terraform will perform the actions described above.
Only 'yes' will be accepted to approve.
Enter a value: yes
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creating...
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/zenith-vm.cloud-config.yaml]
module.pve_vm.proxmox_virtual_environment_vm.vm: Creating...
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m10s elapsed]
module.pve_vm.proxmox_virtual_environment_vm.vm: Creation complete after 3m13s [id=103]
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = "192.168.66.159"
```
✅ The VM is now ready!
![VM on Proxmox WebUI deployed using a Terraform module](img/proxmox-vm-deployed-using-terraform-module.png)
🕗 *Don't pay attention to the uptime, I took the screenshot the next day*
---
## Deploy Multiple VM at Once
Ok, I've deployed a single VM, fine. But now, how to scale it? How to deploy multiple instances of that template, with different names, on different nodes, with different size? This is what I will show you now.
### One VM per Node
In the earlier example, we passed fixed values to the module. Instead, we could define a local object to store the VM specs, and reference its values when calling the module. This approach makes it easier to scale the deployment logic later:
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
node_name = local.vm.node_name
vm_name = local.vm.vm_name
vm_cpu = local.vm.vm_cpu
vm_ram = local.vm.vm_ram
vm_vlan = local.vm.vm_vlan
}
locals {
vm = {
node_name = "zenith"
vm_name = "zenith-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
}
```
I could also call the module while iterating on that object:
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
for_each = local.vm_list
node_name = each.value.node_name
vm_name = each.value.vm_name
vm_cpu = each.value.vm_cpu
vm_ram = each.value.vm_ram
vm_vlan = each.value.vm_vlan
}
locals {
vm_list = {
zenith = {
node_name = "zenith"
vm_name = "zenith-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
}
}
```
While this does not make sense with only one VM, I could use this module syntax, for example, to deploy one VM per node:
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
for_each = local.vm_list
node_name = each.value.node_name
vm_name = each.value.vm_name
vm_cpu = each.value.vm_cpu
vm_ram = each.value.vm_ram
vm_vlan = each.value.vm_vlan
}
locals {
vm_list = {
for vm in flatten([
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : {
node_name = node
vm_name = "${node}-vm"
vm_cpu = 2
vm_ram = 2048
vm_vlan = 66
}
]) : vm.vm_name => vm
}
}
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
output "vm_ip" {
value = { for k, v in module.pve_vm : k => v.vm_ip }
}
```
✅ This would deploy 3 VM on my cluster, one per node:
### Multiple VM per Node
Finally, lets scale things up by deploying multiple VMs with different configurations per node. Well define a set of roles and use a nested loop to generate the desired VM configurations for each Proxmox node:
```hcl
module "pve_vm" {
source = "../../modules/pve_vm"
for_each = local.vm_list
node_name = each.value.node_name
vm_name = each.value.vm_name
vm_cpu = each.value.vm_cpu
vm_ram = each.value.vm_ram
vm_vlan = each.value.vm_vlan
}
locals {
vm_attr = {
"master" = { ram = 2048, cpu = 2, vlan = 66 }
"worker" = { ram = 1024, cpu = 1, vlan = 66 }
}
vm_list = {
for vm in flatten([
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : [
for role, config in local.vm_attr : {
node_name = node
vm_name   = "${node}-${role}"
vm_cpu = config.cpu
vm_ram = config.ram
vm_vlan = config.vlan
}
]
]) : vm.vm_name => vm
}
}
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
output "vm_ip" {
value = { for k, v in module.pve_vm : k => v.vm_ip }
}
```
🚀 After deploying it with a `terraform apply`, I got this:
```bash
Apply complete! Resources: 6 added, 0 changed, 0 destroyed.
Outputs:
vm_ip = {
"apex-master" = "192.168.66.167"
"apex-worker" = "192.168.66.168"
"vertex-master" = "192.168.66.169"
"vertex-worker" = "192.168.66.170"
"zenith-master" = "192.168.66.166"
"zenith-worker" = "192.168.66.172"
}
```
---
## Conclusion
Weve transformed our Proxmox VM deployment into a reusable Terraform module and used it to easily scale our infrastructure across multiple nodes.
In a next post, I would like to pair Terraform with Ansible to manage the VM deployment and even manage different Terraform workspaces to handle several environments.
Stay tuned!

View File

@@ -1,636 +0,0 @@
---
slug: create-manual-kubernetes-cluster-kubeadm
title: Créer un Cluster Kubernetes Hautement Disponible avec kubeadm sur des VMs
description: Guide étape par étape pour créer manuellement un cluster Kubernetes hautement disponible sur des machines virtuelles avec kubeadm.
date: 2025-07-18
draft: false
tags:
- kubernetes
- kubeadm
- high-availability
categories:
- homelab
---
## Intro
Dans cet [article précédent]({{< ref "post/7-terraform-create-proxmox-module" >}}), j'expliquais comment déployer des VMs avec un module **Terraform** sur **Proxmox** et j'avais terminé avec 6 VMs, 3 nœuds masters et 3 nœuds workers, en m'appuyant sur un [template cloud-init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
Maintenant que l'infrastructure est prête, passons à l'étape suivante : **créer manuellement un cluster Kubernetes** dans mon homelab avec `kubeadm`, hautement disponible utilisant `etcd` empilé.
Dans cet article, je vais détailler chaque étape de l'installation dun cluster Kubernetes. Je n'utiliserai pas d'outil d'automatisation pour configurer les nœuds pour le moment, afin de mieux comprendre les étapes impliquées dans le bootstrap dun cluster Kubernetes. L'automatisation sera couverte dans de futurs articles.
---
## Qu'est ce que Kubernetes
Kubernetes est une plateforme open-source qui orchestre des containers sur un ensemble de machines. Elle gère le déploiement, la montée en charge et la santé des applications conteneurisées, ce qui vous permet de vous concentrer sur vos services plutôt que sur linfrastructure sous-jacente.
Un cluster Kubernetes est composé de deux types de nœuds : les nœuds control plane (masters) et les workers. Le control plane assure la gestion globale du cluster, il prend les décisions de planification, surveille létat du système et réagit aux événements. Les workers, eux, exécutent réellement vos applications, dans des containers gérés par Kubernetes.
Dans cet article, nous allons mettre en place manuellement un cluster Kubernetes avec 3 nœuds control plane et 3 workers. Cette architecture reflète un environnement hautement disponible et proche de la production, même si lobjectif ici est avant tout pédagogique.
La documentation officielle se trouve [ici](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), je vais utiliser la version **v1.32**.
---
## Préparer les Nœuds
Je vais exécuter les étapes suivantes sur les **6 VMs** (masters et workers).
### Hostname
Chaque VM possède un **nom dhôte unique** et tous les nœuds doivent pouvoir **se résoudre entre eux**.
Le nom dhôte est défini à la création de la VM via cloud-init. Mais pour la démonstration, je vais le définir manuellement :
```bash
sudo hostnamectl set-hostname <hostname>
```
Dans mon infrastructure, les nœuds se résolvent via mon serveur DNS sur le domaine `lab.vezpi.me`. Si vous navez pas de DNS, vous pouvez inscrire manuellement les IPs des nœuds dans le fichier `/etc/hosts` :
```bash
192.168.66.168 apex-worker
192.168.66.167 apex-master
192.168.66.166 zenith-master
192.168.66.170 vertex-worker
192.168.66.169 vertex-master
192.168.66.172 zenith-worker
```
### Mises à jour Système
Mes VMs tournent sous **Ubuntu 24.04.2 LTS**. Cloud-init soccupe des mises à jour après le provisionnement, mais on sassure quand même que tout est bien à jour et on installe les paquets nécessaires pour ajouter le dépôt Kubernetes :
```bash
sudo apt update && sudo apt upgrade -y
sudo apt install -y apt-transport-https ca-certificates curl gpg
```
### Swap
Par défaut, `kubelet` ne démarre pas si une **mémoire swap** est détectée sur un nœud. Il faut donc la désactiver ou la rendre tolérable par `kubelet`.
Mes VMs ne disposent pas de swap, mais voici comment le désactiver si besoin :
```bash
sudo swapoff -a
sudo sed -i '/ swap / s/^/#/' /etc/fstab
```
### Pare-feu
Dans ce lab, je désactive simplement le pare-feu local (à ne pas faire en production) :
```bash
sudo systemctl disable --now ufw
```
En production, vous devez autoriser la communication entre les nœuds sur les ports suivants :
#### Control Plane
| Protocole | Direction | Ports | Usage | Utilisé par |
| --------- | --------- | --------- | ----------------------- | -------------------- |
| TCP | Entrant | 6443 | API server Kubernetes | Tous |
| TCP | Entrant | 2379-2380 | API client etcd | kube-apiserver, etcd |
| TCP | Entrant | 10250 | API Kubelet | Plan de contrôle |
| TCP | Entrant | 10259 | kube-scheduler | Lui-même |
| TCP | Entrant | 10257 | kube-controller-manager | Lui-même |
#### Worker
| Protocole | Direction | Ports | Usage | Utilisé par |
| --------- | --------- | ----------- | ----------------- | -------------- |
| TCP | Entrant | 10250 | API Kubelet | Control plane |
| TCP | Entrant | 10256 | kube-proxy | Load balancers |
| TCP | Entrant | 30000-32767 | Services NodePort | Tous |
### Modules Noyau et Paramètres sysctl
Kubernetes requiert lactivation de deux modules noyau :
- **overlay** : pour permettre lempilement de systèmes de fichiers.
- **br_netfilter** : pour activer le filtrage des paquets sur les interfaces bridge.
Activation des modules :
```bash
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
```
Appliquer les paramètres noyau nécessaires pour la partie réseau :
```bash
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
```
### Runtime de Containers
Chaque nœud du cluster doit disposer dun **runtime de containers** pour pouvoir exécuter des Pods. Jutilise ici `containerd` :
```bash
sudo apt install -y containerd
```
Créer la configuration par défaut :
```bash
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
```
Utiliser `systemd` comme pilote de _cgroup_ :
```bash
sudo sed -i 's/^\(\s*SystemdCgroup\s*=\s*\)false/\1true/' /etc/containerd/config.toml
```
Redémarrer et activer le service `containerd` :
```bash
sudo systemctl restart containerd
sudo systemctl enable containerd
```
### Paquets Kubernetes
Dernière étape : installer les paquets Kubernetes. On commence par ajouter le dépôt officiel et sa clé de signature.
Ajouter la clé :
```bash
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
```
Ajouter le dépôt :
```bash
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
```
Installer ensuite les paquets nécessaires :
- `kubeadm` : loutil pour initier un cluster Kubernetes.
- `kubelet` : lagent qui sexécute sur tous les nœuds et qui gère les pods/containers.
- `kubectl` : loutil en ligne de commande pour interagir avec le cluster.
Sur les nœuds, on installe `kubelet` et `kubeadm`, puis on les fige :
```bash
sudo apt-get update
sudo apt-get install -y kubelet kubeadm
sudo apt-mark hold kubelet kubeadm
```
Je ne gérerai pas le cluster depuis les nœuds eux-mêmes, jinstalle `kubectl` sur mon contrôleur LXC à la place :
```bash
sudo apt-get update
sudo apt-get install -y kubectl
sudo apt-mark hold kubectl
```
---
## Initialiser le Cluster
Une fois tous les nœuds préparés, on peut initialiser le **plan de contrôle** Kubernetes sur le **premier nœud master**.
### Amorcer le Cluster
Exécutez la commande suivante pour amorcer le cluster:
```bash
sudo kubeadm init \
--control-plane-endpoint "k8s-lab.lab.vezpi.me:6443" \
--upload-certs \
--pod-network-cidr=10.10.0.0/16
```
**Explications** :
- `--control-plane-endpoint` : Nom DNS pour votre plan de contrôle.
- `--upload-certs` : Télécharge les certificats qui doivent être partagés entre toutes les masters du cluster.
- `--pod-network-cidr` : Sous-réseau à utiliser pour le CNI.
Cette étape va :
- Initialiser la base `etcd` et les composants du plan de contrôle.
- Configurer RBAC et les tokens damorçage.
- Afficher deux commandes `kubeadm join` importantes : une pour les **workers**, lautre pour les **masters supplémentaires**.
Le nom DNS `k8s-lab.lab.vezpi.me` est géré dans mon homelab par **Unbound DNS**, cela résout sur mon interface d'**OPNsense** où un service **HAProxy** écoute sur le port 6443 et équilibre la charge entre les 3 nœuds du plan de contrôle.
Vous verrez aussi un message indiquant comment configurer laccès `kubectl`.
```plaintext
I0718 07:18:29.306814 14724 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
[init] Using Kubernetes version: v1.32.7
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
W0718 07:18:29.736833 14724 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [apex-master k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.66.167]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "super-admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests"
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
[kubelet-check] The kubelet is healthy after 501.894876ms
[api-check] Waiting for a healthy API server. This can take up to 4m0s
[api-check] The API server is healthy after 9.030595455s
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
[mark-control-plane] Marking the node apex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node apex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: 8etamd.g8whseg60kg09nu1
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes running the following command on each as root:
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c \
--control-plane --certificate-key 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
```
### Configurer `kubectl`
Si vous préférez gérer votre cluster depuis le nœud master, vous pouvez simplement copier-coller depuis la sortie de la commande `kubeadm init` :
```bash
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
```
Si vous préférez contrôler le cluster depuis autre part, dans mon cas depuis mon bastion LXC :
```bash
mkdir -p $HOME/.kube
rsync --rsync-path="sudo rsync" <master-node>:/etc/kubernetes/admin.conf $HOME/.kube/config
```
Vérifiez l'accès :
```bash
kubectl get nodes
```
You devriez voir seulement le premier master listé (dans l'état `NotReady` jusqu'à ce que le CNI soit déployé).
### Installer le Plugin CNI Cilium
Depuis la [documentation Cilium](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/), Il y a 2 manières principales pour installer le CNI : utiliser la **CLI Cilium** ou **Helm**, pour ce lab je vais utiliser l'outil CLI.
#### Installer la CLI Cilium
La CLI Cilium peut être utilisée pour installer Cilium, inspecter l'état de l'installation Cilium et activer/désactiver diverses fonctionnalités (ex : `clustermesh`, `Hubble`) :
```bash
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-amd64.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/local/bin
rm cilium-linux-amd64.tar.gz{,.sha256sum}
```
#### Installer Cilium
Installer Cilium dans le cluster Kubernetes pointé par le contexte `kubectl` :
```bash
cilium install
```
```plaintext
__ Using Cilium version 1.17.5
__ Auto-detected cluster name: kubernetes
__ Auto-detected kube-proxy has been installed
```
#### Valider l'Installation
Pour valider que Cilium a été installé correctement :
```bash
cilium status --wait
```
```plaintext
/__\
/__\__/__\ Cilium: OK
\__/__\__/ Operator: OK
/__\__/__\ Envoy DaemonSet: OK
\__/__\__/ Hubble Relay: disabled
\__/ ClusterMesh: disabled
DaemonSet cilium Desired: 1, Ready: 1/1, Available: 1/1
DaemonSet cilium-envoy Desired: 1, Ready: 1/1, Available: 1/1
Deployment cilium-operator Desired: 1, Ready: 1/1, Available: 1/1
Containers: cilium Running: 1
cilium-envoy Running: 1
cilium-operator Running: 1
clustermesh-apiserver
hubble-relay
Cluster Pods: 0/2 managed by Cilium
Helm chart version: 1.17.5
Image versions cilium quay.io/cilium/cilium:v1.17.5@sha256:baf8541723ee0b72d6c489c741c81a6fdc5228940d66cb76ef5ea2ce3c639ea6: 1
cilium-envoy quay.io/cilium/cilium-envoy:v1.32.6-1749271279-0864395884b263913eac200ee2048fd985f8e626@sha256:9f69e290a7ea3d4edf9192acd81694089af048ae0d8a67fb63bd62dc1d72203e: 1
cilium-operator quay.io/cilium/operator-generic:v1.17.5@sha256:f954c97eeb1b47ed67d08cc8fb4108fb829f869373cbb3e698a7f8ef1085b09e: 1
```
Une fois installé, le nœud master doit passer au statut `Ready`.
```plaintext
NAME STATUS ROLES AGE VERSION
apex-master Ready control-plane 99m v1.32.7
```
---
## Ajouter les Nœuds Supplémentaires
Après avoir initialisé le premier nœud du control plane, vous pouvez maintenant **ajouter les autres nœuds** au cluster.
Il existe deux types de commandes `join` :
- Une pour rejoindre les **nœuds du control plane (masters)**
- Une pour rejoindre les **nœuds workers**
Ces commandes sont affichées à la fin de la commande `kubeadm init`. Si vous ne les avez pas copiées, il est possible de les **régénérer**.
⚠️ Les certificats et la clé de déchiffrement **expirent au bout de deux heures**.
### Ajouter des Masters
Vous pouvez maintenant ajouter d'autres nœuds du control plane en exécutant la commande fournie par `kubeadm init` :
```bash
sudo kubeadm join <control-plane-endpoint> --token <token> --discovery-token-ca-cert-hash <discovery-token-ca-cert-hash> --control-plane --certificate-key <certificate-key>
```
```plaintext
[preflight] Running pre-flight checks
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
W0718 09:27:32.248290 12043 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local vertex-master] and IPs [10.96.0.1 192.168.66.169]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
[kubelet-check] The kubelet is healthy after 501.761616ms
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Creating static Pod manifest for "etcd"
{"level":"warn","ts":"2025-07-18T09:27:36.040077Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
[...]
{"level":"warn","ts":"2025-07-18T09:27:44.976805Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[mark-control-plane] Marking the node vertex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node vertex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
```
#### Regénérer les Certificats
Si les certificats ont expiré, vous verrez un message derreur lors du `kubeadm join` :
```plaintext
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
```
Dans ce cas, vous pouvez **retélécharger les certificats** et générer une nouvelle clé de chiffrement à partir dun nœud déjà membre du cluster :
```bash
sudo kubeadm init phase upload-certs --upload-certs
```
```plaintext
I0718 09:26:12.448472 18624 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
7531149107ebc3caf4990f94d19824aecf39d93b84ee1b9c86aee84c04e76656
```
#### Générer un Token
Associé au certificat, vous aurez besoin dun **nouveau token**, cette commande affichera directement la commande complète `join` pour un master :
```bash
sudo kubeadm token create --print-join-command --certificate-key <certificate-key>
```
Utilisez cette commande sur les nœuds à ajouter au cluster Kubernetes comme master.
### Ajouter des Workers
Vous pouvez rejoindre n'importe quel nombre de nœuds workers avec la commande suivante :
```bash
sudo kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
```
```plaintext
[preflight] Running pre-flight checks
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
[kubelet-check] The kubelet is healthy after 506.731798ms
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
```
Encore une fois, si vous avez perdu loutput initial de `kubeadm init`, vous pouvez régénérer une nouvelle commande complète :
```bash
sudo kubeadm token create --print-join-command
```
Utilisez cette commande sur les nœuds à ajouter comme workers.
### Vérifier le Cluster
Depuis votre contrôleur, vous pouvez vérifier que tous les nœuds ont bien rejoint le cluster et sont dans létat `Ready` :
```bash
kubectl get node
```
```plaintext
NAME STATUS ROLES AGE VERSION
apex-master Ready control-plane 154m v1.32.7
apex-worker Ready <none> 5m14s v1.32.7
vertex-master Ready control-plane 26m v1.32.7
vertex-worker Ready <none> 3m39s v1.32.7
zenith-master Ready control-plane 23m v1.32.7
zenith-worker Ready <none> 3m26s v1.32.7
```
Pour valider que le cluster a une bonne connectivité réseau :
```bash
cilium connectivity test
```
```plaintext
__ Monitor aggregation detected, will skip some flow validation steps
[kubernetes] Creating namespace cilium-test-1 for connectivity check...
__ [kubernetes] Deploying echo-same-node service...
__ [kubernetes] Deploying DNS test server configmap...
__ [kubernetes] Deploying same-node deployment...
__ [kubernetes] Deploying client deployment...
__ [kubernetes] Deploying client2 deployment...
__ [kubernetes] Deploying client3 deployment...
__ [kubernetes] Deploying echo-other-node service...
__ [kubernetes] Deploying other-node deployment...
__ [host-netns] Deploying kubernetes daemonset...
__ [host-netns-non-cilium] Deploying kubernetes daemonset...
__ Skipping tests that require a node Without Cilium
[kubernetes] Waiting for deployment cilium-test-1/client to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/client2 to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/echo-same-node to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/client3 to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/echo-other-node to become ready...
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach default/kubernetes service...
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach default/kubernetes service...
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach default/kubernetes service...
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to become ready...
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-6824w
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to become ready...
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-6824w
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
__ [kubernetes] Waiting for NodePort 192.168.66.166:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.166:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.172:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.172:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.167:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.167:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.168:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.168:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.169:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.169:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.170:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.170:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns-non-cilium to become ready...
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns to become ready...
__ Skipping IPCache check
Enabling Hubble telescope...
__ Unable to contact Hubble Relay, disabling Hubble telescope and flow validation: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp [::1]:4245: connect: connection refused"
Expose Relay locally with:
cilium hubble enable
cilium hubble port-forward&
__ Cilium version: 1.17.5
[cilium-test-1] Running 123 tests ...
[=] [cilium-test-1] Test [no-policies] [1/123]
[...]
[=] [cilium-test-1] Test [check-log-errors] [123/123]
.................................................
__ [cilium-test-1] All 73 tests (739 actions) successful, 50 tests skipped, 1 scenarios skipped.
```
⌛ Ce test de connectivité peut prendre jusquà **30 minutes**.
---
## Conclusion
🚀 Notre cluster Kubernetes hautement disponible est prêt !
Dans cet article, nous avons vu comment **créer manuellement un cluster Kubernetes** dans mon homelab à laide de `kubeadm`, sur un ensemble de 6 machines Ubuntu (3 masters et 3 workers) préalablement déployées avec Terraform sur Proxmox.
Nous avons suivi les étapes suivantes :
- Préparation des nœuds avec les outils, modules noyau et runtime nécessaires
- Installation des paquets Kubernetes
- Initialisation du cluster depuis le premier nœud master
- Ajout des autres nœuds du plan de contrôle et les workers
- Vérification de létat et du bon fonctionnement du cluster
Cette approche manuelle permet de mieux comprendre comment un cluster Kubernetes est construit en interne. Cest une excellente base avant de passer à lautomatisation dans les prochains articles, en utilisant des outils comme Ansible.
Restez connectés, la suite sera axée sur lautomatisation de tout ça !

View File

@@ -1,635 +0,0 @@
---
slug: create-manual-kubernetes-cluster-kubeadm
title: Create a Highly Available Kubernetes Cluster with kubeadm on VMs
description: Step-by-step guide to manually build a highly available Kubernetes cluster on virtual machines using kubeadm.
date: 2025-07-18
draft: false
tags:
- kubernetes
- kubeadm
- high-availability
categories:
- homelab
---
## Intro
In this [previous article]({{< ref "post/7-terraform-create-proxmox-module" >}}), I explained how to deploy VMs using a **Terraform** module with **Proxmox** and ended up with 6 VMs, 3 masters and 3 workers nodes, based on [cloud-init template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
Now that the infrastructure is ready, lets move on to the next step: **manually building a Kubernetes cluster** in my homelab using `kubeadm`, highly available using stacked `etcd`.
In this post, Ill walk through each step of the installation process of a Kubernetes cluster. I will not rely on automation tools to configure the nodes for now, to better understand what are the steps involved in a Kubernetes cluster bootstrapping. Automation will be covered in future posts.
---
## What is Kubernetes
Kubernetes is an open-source platform for orchestrating containers across a group of machines. It handles the deployment, scaling, and health of containerized applications, allowing you to focus on building your services rather than managing infrastructure details.
A Kubernetes cluster is made up of two main types of nodes: control plane (masters) nodes and worker nodes. The control plane is responsible for the overall management of the cluster, it makes decisions about scheduling, monitoring, and responding to changes in the system. The worker nodes are where your applications actually run, inside containers managed by Kubernetes.
In this post, well manually set up a Kubernetes cluster with 3 control plane nodes (masters) and 3 workers. This structure reflects a highly available and production-like setup, even though the goal here is mainly to learn and understand how the components fit together.
The official documentation can be found [here](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), I will use the version **v1.32**.
---
## Prepare the Nodes
I will perform the following steps on all 6 VMs (masters and workers).
### Hostname
Each VM has a unique **hostname** and all nodes must **resolve** each other.
The hostname is set upon the VM creation with cloud-init. But for demonstration purpose, I'll set it manually:
```bash
sudo hostnamectl set-hostname <hostname>
```
On my infrastructure, the nodes resolve the hostnames each other using my DNS server on that domain (`lab.vezpi.me`). In case you don't have a DNS server, you can hardcode the nodes IP in each `/etc/hosts` file:
```bash
192.168.66.168 apex-worker
192.168.66.167 apex-master
192.168.66.166 zenith-master
192.168.66.170 vertex-worker
192.168.66.169 vertex-master
192.168.66.172 zenith-worker
```
### OS Updates
My VMs are running **Ubuntu 24.04.2 LTS**. Cloud-init handles the updates after the provision in that case, but let's make sure everything is up to date and install packages needed to add Kubernetes repository:
```bash
sudo apt update && sudo apt upgrade -y
sudo apt install -y apt-transport-https ca-certificates curl gpg
```
### Swap
The default behavior of a `kubelet` is to fail to start if **swap memory** is detected on a node. This means that swap should either be disabled or tolerated by `kubelet`.
My VMs are not using swap, but here how to disable it:
```bash
sudo swapoff -a
sudo sed -i '/ swap / s/^/#/' /etc/fstab
```
### Firewall
For this lab, I will just disable the local firewall (don't do that in production):
```bash
sudo systemctl disable --now ufw
```
For production, you want to allow the nodes to talk to each other on these ports:
#### Control plane
|Protocol|Direction|Port Range|Purpose|Used By|
|---|---|---|---|---|
|TCP|Inbound|6443|Kubernetes API server|All|
|TCP|Inbound|2379-2380|etcd server client API|kube-apiserver, etcd|
|TCP|Inbound|10250|Kubelet API|Self, Control plane|
|TCP|Inbound|10259|kube-scheduler|Self|
|TCP|Inbound|10257|kube-controller-manager|Self|
#### Worker
| Protocol | Direction | Port Range | Purpose | Used By |
| -------- | --------- | ----------- | ------------------ | -------------------- |
| TCP | Inbound | 10250 | Kubelet API | Self, Control plane |
| TCP | Inbound | 10256 | kube-proxy | Self, Load balancers |
| TCP | Inbound | 30000-32767 | NodePort Services† | All |
### Kernel Modules and Settings
Kubernetes needs 2 kernel modules:
- **overlay**: for facilitating the layering of one filesystem on top of another
- **br_netfilter**: for enabling bridge network connections
Let's enable them:
```bash
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
```
Some kernel settings related to network are also needed:
```bash
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
```
### Container Runtime
You need to install a **container runtime** into each node in the cluster so that Pods can run there. I will use `containerd`:
```bash
sudo apt install -y containerd
```
Create the default configuration:
```bash
sudo mkdir -p /etc/containerd
containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
```
Enable `systemd` *cgroup* driver:
```bash
sudo sed -i 's/^\(\s*SystemdCgroup\s*=\s*\)false/\1true/' /etc/containerd/config.toml
```
Restart and enable the `containerd` service
```bash
sudo systemctl restart containerd
sudo systemctl enable containerd
```
### Kubernetes Packages
Last step: install the Kubernetes packages. I start with adding the repository and its signing key.
Add the key:
```bash
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
```
Add the repository:
```bash
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
```
Finally I can install the needed packages:
- `kubeadm`: the command to bootstrap the cluster.
- `kubelet`: the component that runs on all of the machines in your cluster and does things like starting pods and containers.
- `kubectl`: the command line util to talk to your cluster.
On the nodes, update the `apt` package index, install `kubelet` and `kubeadm`, and pin their version:
```bash
sudo apt-get update
sudo apt-get install -y kubelet kubeadm
sudo apt-mark hold kubelet kubeadm
```
I will not manage the cluster from my nodes, I install `kubectl` on my LXC controller instead:
```bash
sudo apt-get update
sudo apt-get install -y kubectl
sudo apt-mark hold kubectl
```
---
## Initialize the Cluster
Once all nodes are prepared, its time to initialize the Kubernetes control plane on the **first master node**.
### Bootstrap the Cluster
Run the following command to bootstrap the cluster:
```bash
sudo kubeadm init \
--control-plane-endpoint "k8s-lab.lab.vezpi.me:6443" \
--upload-certs \
--pod-network-cidr=10.10.0.0/16
```
**Explanation**:
- `--control-plane-endpoint`: DNS name for your control plane.
- `--upload-certs`: Upload the certificates that should be shared across all masters of the cluster.
- `--pod-network-cidr`: Subnet for the CNI.
This step will:
- Initialize the `etcd` database and control plane components.
- Set up RBAC and bootstrap tokens.
- Output two important `kubeadm join` commands: one for **workers**, and one for **additional control-plane nodes**.
The DNS name `k8s-lab.lab.vezpi.me` is handled in my homelab by **Unbound DNS**, this resolves on my **OPNsense** interface where a **HAProxy** service listen on the port 6443 and load balance between the 3 control plane nodes.
Youll also see a message instructing you to set up your `kubectl` access.
```plaintext
I0718 07:18:29.306814 14724 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
[init] Using Kubernetes version: v1.32.7
[preflight] Running pre-flight checks
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
W0718 07:18:29.736833 14724 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [apex-master k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.66.167]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "super-admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests"
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
[kubelet-check] The kubelet is healthy after 501.894876ms
[api-check] Waiting for a healthy API server. This can take up to 4m0s
[api-check] The API server is healthy after 9.030595455s
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
[mark-control-plane] Marking the node apex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node apex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
[bootstrap-token] Using token: 8etamd.g8whseg60kg09nu1
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy
Your Kubernetes control-plane has initialized successfully!
To start using your cluster, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Alternatively, if you are the root user, you can run:
export KUBECONFIG=/etc/kubernetes/admin.conf
You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
https://kubernetes.io/docs/concepts/cluster-administration/addons/
You can now join any number of control-plane nodes running the following command on each as root:
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c \
--control-plane --certificate-key 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
Then you can join any number of worker nodes by running the following on each as root:
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
```
### Configure `kubectl`
If you want to manage your cluster from your master node, you can simply copy paste from the output of the `kubeadm init` command:
```bash
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
```
If you prefer to control the cluster from elsewhere, in my case my from my LXC bastion:
```bash
mkdir -p $HOME/.kube
rsync --rsync-path="sudo rsync" <master-node>:/etc/kubernetes/admin.conf $HOME/.kube/config
```
Verify your access:
```bash
kubectl get nodes
```
You should see only the first master listed (in `NotReady` state until the CNI is deployed).
### Install the CNI Plugin Cilium
From the [Cilium documentation](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/), there are 2 common ways for installing the CNI: using the **Cilium CLI** or **Helm**, for that lab I will use the CLI tool.
#### Install the Cilium CLI
The Cilium CLI can be used to install Cilium, inspect the state of a Cilium installation, and enable/disable various features (e.g. `clustermesh`, `Hubble`). Install it on your controller where `kubectl` is installed:
```bash
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-amd64.tar.gz{,.sha256sum}
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/local/bin
rm cilium-linux-amd64.tar.gz{,.sha256sum}
```
#### Install Cilium
Install Cilium into the Kubernetes cluster pointed to by your current `kubectl` context:
```bash
cilium install
```
```plaintext
__ Using Cilium version 1.17.5
__ Auto-detected cluster name: kubernetes
__ Auto-detected kube-proxy has been installed
```
#### Validate the Installation
To validate that Cilium has been properly installed:
```bash
cilium status --wait
```
```plaintext
/__\
/__\__/__\ Cilium: OK
\__/__\__/ Operator: OK
/__\__/__\ Envoy DaemonSet: OK
\__/__\__/ Hubble Relay: disabled
\__/ ClusterMesh: disabled
DaemonSet cilium Desired: 1, Ready: 1/1, Available: 1/1
DaemonSet cilium-envoy Desired: 1, Ready: 1/1, Available: 1/1
Deployment cilium-operator Desired: 1, Ready: 1/1, Available: 1/1
Containers: cilium Running: 1
cilium-envoy Running: 1
cilium-operator Running: 1
clustermesh-apiserver
hubble-relay
Cluster Pods: 0/2 managed by Cilium
Helm chart version: 1.17.5
Image versions cilium quay.io/cilium/cilium:v1.17.5@sha256:baf8541723ee0b72d6c489c741c81a6fdc5228940d66cb76ef5ea2ce3c639ea6: 1
cilium-envoy quay.io/cilium/cilium-envoy:v1.32.6-1749271279-0864395884b263913eac200ee2048fd985f8e626@sha256:9f69e290a7ea3d4edf9192acd81694089af048ae0d8a67fb63bd62dc1d72203e: 1
cilium-operator quay.io/cilium/operator-generic:v1.17.5@sha256:f954c97eeb1b47ed67d08cc8fb4108fb829f869373cbb3e698a7f8ef1085b09e: 1
```
Once installed, the master node should transition to `Ready` status:
```plaintext
NAME STATUS ROLES AGE VERSION
apex-master Ready control-plane 99m v1.32.7
```
---
## Join Additional Nodes
After initializing the first control plane node, you can now join the remaining nodes to the cluster.
There are two types of join commands:
- One for joining **control-plane (master) nodes**
- One for joining **worker nodes**
These commands were displayed at the end of the `kubeadm init` output. If you didnt copy them, you can regenerate them.
⚠️ The certificates and the decryption key expire after two hours.
### Additional Masters
You can now join any number of control-plane node by running the command given by the `kubeadm init` command:
```bash
sudo kubeadm join <control-plane-endpoint> --token <token> --discovery-token-ca-cert-hash <discovery-token-ca-cert-hash> --control-plane --certificate-key <certificate-key>
```
```plaintext
[preflight] Running pre-flight checks
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
[preflight] Running pre-flight checks before initializing the new control plane instance
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
W0718 09:27:32.248290 12043 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local vertex-master] and IPs [10.96.0.1 192.168.66.169]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
[certs] Using the existing "sa" key
[kubeconfig] Generating kubeconfig files
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
[control-plane] Creating static Pod manifest for "kube-scheduler"
[check-etcd] Checking that the etcd cluster is healthy
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
[kubelet-check] The kubelet is healthy after 501.761616ms
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
[etcd] Announced new etcd member joining to the existing etcd cluster
[etcd] Creating static Pod manifest for "etcd"
{"level":"warn","ts":"2025-07-18T09:27:36.040077Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
[...]
{"level":"warn","ts":"2025-07-18T09:27:44.976805Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
[mark-control-plane] Marking the node vertex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
[mark-control-plane] Marking the node vertex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
This node has joined the cluster and a new control plane instance was created:
* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.
To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
```
#### Regenerate Certificates
If the certificate is expired, you would see a message like this on the `kubeadm join` command:
```plaintext
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
```
If so, re-upload the certificates and generate a new decryption key, use the following command on a control plane node that is already joined to the cluster:
```bash
sudo kubeadm init phase upload-certs --upload-certs
```
```plaintext
I0718 09:26:12.448472 18624 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
7531149107ebc3caf4990f94d19824aecf39d93b84ee1b9c86aee84c04e76656
```
#### Generate Token
Paired with the certificate, you'll need a new token, this will print the whole join command as control plane:
```bash
sudo kubeadm token create --print-join-command --certificate-key <certificate-key>
```
Use the command given to join the Kubernetes cluster on the desired node as master.
### Join Workers
You can join any number of worker nodes by running the following
```bash
sudo kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
```
```plaintext
[preflight] Running pre-flight checks
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Starting the kubelet
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
[kubelet-check] The kubelet is healthy after 506.731798ms
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
This node has joined the cluster:
* Certificate signing request was sent to apiserver and a response was received.
* The Kubelet was informed of the new secure connection details.
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
```
Again here if you missed the output of the `kubeadm init`, you can generate a new token and the full `join` command:
```bash
sudo kubeadm token create --print-join-command
```
Use the command given to join the Kubernetes cluster on the desired node as worker.
### Verify Cluster
From your controller, you can verify if all the nodes joined the cluster and are in the `Ready` status:
```bash
kubectl get node
```
```plaintext
NAME STATUS ROLES AGE VERSION
apex-master Ready control-plane 154m v1.32.7
apex-worker Ready <none> 5m14s v1.32.7
vertex-master Ready control-plane 26m v1.32.7
vertex-worker Ready <none> 3m39s v1.32.7
zenith-master Ready control-plane 23m v1.32.7
zenith-worker Ready <none> 3m26s v1.32.7
```
To validate that your cluster has proper network connectivity:
```bash
cilium connectivity test
```
```plaintext
__ Monitor aggregation detected, will skip some flow validation steps
[kubernetes] Creating namespace cilium-test-1 for connectivity check...
__ [kubernetes] Deploying echo-same-node service...
__ [kubernetes] Deploying DNS test server configmap...
__ [kubernetes] Deploying same-node deployment...
__ [kubernetes] Deploying client deployment...
__ [kubernetes] Deploying client2 deployment...
__ [kubernetes] Deploying client3 deployment...
__ [kubernetes] Deploying echo-other-node service...
__ [kubernetes] Deploying other-node deployment...
__ [host-netns] Deploying kubernetes daemonset...
__ [host-netns-non-cilium] Deploying kubernetes daemonset...
__ Skipping tests that require a node Without Cilium
[kubernetes] Waiting for deployment cilium-test-1/client to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/client2 to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/echo-same-node to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/client3 to become ready...
__ [kubernetes] Waiting for deployment cilium-test-1/echo-other-node to become ready...
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach default/kubernetes service...
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach default/kubernetes service...
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach default/kubernetes service...
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to become ready...
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-6824w
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to become ready...
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-6824w
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
__ [kubernetes] Waiting for NodePort 192.168.66.166:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.166:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.172:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.172:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.167:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.167:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.168:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.168:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.169:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.169:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.170:32391 (cilium-test-1/echo-other-node) to become ready...
__ [kubernetes] Waiting for NodePort 192.168.66.170:32055 (cilium-test-1/echo-same-node) to become ready...
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns-non-cilium to become ready...
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns to become ready...
__ Skipping IPCache check
Enabling Hubble telescope...
__ Unable to contact Hubble Relay, disabling Hubble telescope and flow validation: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp [::1]:4245: connect: connection refused"
Expose Relay locally with:
cilium hubble enable
cilium hubble port-forward&
__ Cilium version: 1.17.5
[cilium-test-1] Running 123 tests ...
[=] [cilium-test-1] Test [no-policies] [1/123]
[...]
[=] [cilium-test-1] Test [check-log-errors] [123/123]
.................................................
__ [cilium-test-1] All 73 tests (739 actions) successful, 50 tests skipped, 1 scenarios skipped.
```
⌛This connectivity test could take up to 30 minutes.
---
## Conclusion
🚀 Our highly available Kubernetes cluster is ready!
In this post, we walked through the **manual creation of a Kubernetes cluster** in my homelab using `kubeadm`, on top of 6 Ubuntu VMs (3 masters and 3 workers) previously provisioned with Terraform on Proxmox.
We went step by step:
- Preparing the nodes with the required tools, kernel modules, and container runtime
- Installing the Kubernetes packages
- Bootstrapping the cluster from the first master node
- Joining additional control-plane and worker nodes
- Verifying that the cluster is healthy and ready
This manual approach helps to demystify how Kubernetes clusters are built behind the scenes. Its a solid foundation before automating the process in future posts using tools like Ansible.
Stay tuned, next time well look into automating all of this!

View File

@@ -1,634 +0,0 @@
---
slug: expose-kubernetes-pods-externally-ingress-tls
title: Exposer des Pods Kubernetes en externe avec Ingress et TLS
description: Découvrez comment exposer des pods Kubernetes en externe avec Services, Ingress et TLS grâce à BGP, NGINX et Cert-Manager dans un homelab.
date: 2025-08-19
draft: false
tags:
- kubernetes
- helm
- bgp
- opnsense
- cilium
- nginx-ingress-controller
- cert-manager
categories:
- homelab
---
## Intro
Après avoir construit mon propre cluster Kubernetes dans mon homelab avec `kubeadm` dans [cet article]({{< ref "post/8-create-manual-kubernetes-cluster-kubeadm" >}}), mon prochain défi est dexposer un pod simple à lextérieur, accessible via une URL et sécurisé avec un certificat TLS validé par Lets Encrypt.
Pour y parvenir, jai besoin de configurer plusieurs composants :
- **Service** : Expose le pod à lintérieur du cluster et fournit un point daccès.
- **Ingress** : Définit des règles de routage pour exposer des services HTTP(S) à lextérieur.
- **Ingress Controller** : Surveille les ressources Ingress et gère réellement le routage du trafic.
- **Certificats TLS** : Sécurisent le trafic en HTTPS grâce à des certificats délivrés par Lets Encrypt.
Cet article vous guide pas à pas pour comprendre comment fonctionne laccès externe dans Kubernetes dans un environnement homelab.
C'est parti.
---
## Helm
Jutilise **Helm**, le gestionnaire de paquets de facto pour Kubernetes, afin dinstaller des composants externes comme lIngress Controller ou cert-manager.
### Pourquoi Helm
Helm simplifie le déploiement et la gestion des applications Kubernetes. Au lieu décrire et de maintenir de longs manifestes YAML, Helm permet dinstaller des applications en une seule commande, en sappuyant sur des charts versionnés et configurables.
### Installer Helm
Jinstalle Helm sur mon hôte bastion LXC, qui dispose déjà dun accès au cluster Kubernetes :
```bash
curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
sudo apt update
sudo apt install helm
```
---
## Services Kubernetes
Avant de pouvoir exposer un pod à lextérieur, il faut dabord le rendre accessible à lintérieur du cluster. Cest là quinterviennent les **Services Kubernetes**.
Les Services agissent comme un pont entre les pods et le réseau, garantissant que les applications restent accessibles même si les pods sont réordonnés ou redéployés.
Il existe plusieurs types de Services Kubernetes, chacun avec un objectif différent :
- **ClusterIP** expose le Service sur une IP interne au cluster, uniquement accessible depuis lintérieur.
- **NodePort** expose le Service sur un port statique de lIP de chaque nœud, accessible depuis lextérieur du cluster.
- **LoadBalancer** expose le Service sur une IP externe, généralement via une intégration cloud (ou via BGP dans un homelab).
---
## Exposer un Service `LoadBalancer` avec BGP
Au départ, jai envisagé dutiliser **MetalLB** pour exposer les adresses IP des services sur mon réseau local. Cest ce que jutilisais auparavant quand je dépendais de la box de mon FAI comme routeur principal. Mais après avoir lu cet article, [Use Cilium BGP integration with OPNsense](https://devopstales.github.io/kubernetes/cilium-opnsense-bgp/), je réalise que je peux obtenir le même résultat (voire mieux) en utilisant **BGP** avec mon routeur **OPNsense** et **Cilium**, mon CNI.
### Quest-ce que BGP ?
BGP (_Border Gateway Protocol_) est un protocole de routage utilisé pour échanger des routes entre systèmes. Dans un homelab Kubernetes, BGP permet à tes nœuds Kubernetes dannoncer directement leurs IPs à ton routeur ou firewall. Ton routeur sait alors exactement comment atteindre les adresses IP gérées par ton cluster.
Au lieu que MetalLB gère lallocation dIP et les réponses ARP, tes nœuds disent directement à ton routeur : « Hé, cest moi qui possède ladresse 192.168.1.240 ».
### Lapproche MetalLB classique
Sans BGP, MetalLB en mode Layer 2 fonctionne comme ceci :
- Il assigne une adresse IP `LoadBalancer` (par exemple `192.168.1.240`) depuis un pool.
- Un nœud répond aux requêtes ARP pour cette IP sur ton LAN.
Oui, MetalLB peut aussi fonctionner avec BGP, mais pourquoi lutiliser si mon CNI (Cilium) le gère déjà nativement ?
### BGP avec Cilium
Avec Cilium + BGP, tu obtiens :
- Lagent Cilium du nœud annonce les IPs `LoadBalancer` via BGP.
- Ton routeur apprend ces routes et les envoie au bon nœud.
- Plus besoin de MetalLB.
### Configuration BGP
BGP est désactivé par défaut, aussi bien sur OPNsense que sur Cilium. Activons-le des deux côtés.
#### Sur OPNsense
Daprès la [documentation officielle OPNsense](https://docs.opnsense.org/manual/dynamic_routing.html#bgp-section), lactivation de BGP nécessite dinstaller un plugin.
Va dans `System` > `Firmware` > `Plugins` et installe le plugin **os-frr** :
![ ](img/opnsense-add-os-frr-plugin.png)
Installer le plugin `os-frr` dans OPNsense
Une fois installé, active le plugin dans `Routing` > `General` :
![ ](img/opnsense-enable-routing-frr-plugin.png)
Activer le routage dans OPNsense
Ensuite, rends-toi dans la section **BGP**. Dans longlet **General** :
- Coche la case pour activer BGP.
- Défini ton **ASN BGP**. Jai choisi `64512`, le premier ASN privé de la plage réservée (voir [ASN table](https://en.wikipedia.org/wiki/Autonomous_system_\(Internet\)#ASN_Table)) :
![ ](img/opnsense-enable-bgp.png)
Ajoute ensuite tes voisins BGP. Je ne fais le peering quavec mes **nœuds workers** (puisque seuls eux hébergent des workloads). Pour chaque voisin :
- Mets lIP du nœud dans `Peer-IP`.
- Utilise `64513` comme **Remote AS** (celui de Cilium).
- Configure `Update-Source Interface` sur `Lab`.
- Coche `Next-Hop-Self`.
![ ](img/opnsense-bgp-create-neighbor.png)
Voici la liste de mes voisins une fois configurés :
![ ](img/opnsense-bgp-neighbor-list.png)
Liste des voisins BGP
Noublie pas la règle firewall pour autoriser BGP (port `179/TCP`) depuis le VLAN **Lab** vers le firewall :
![ ](img/opnsense-create-firewall-rule-bgp-peering.png)
Autoriser TCP/179 de Lab vers OPNsense
#### Dans Cilium
Jai déjà Cilium installé et je nai pas trouvé comment activer BGP avec la CLI, donc je lai simplement réinstallé avec loption BGP :
```bash
cilium uninstall
cilium install --set bgpControlPlane.enabled=true
```
Je configure uniquement les **nœuds workers** pour établir le peering BGP en les labellisant avec un `nodeSelector` :
```bash
kubectl label node apex-worker node-role.kubernetes.io/worker=""
kubectl label node vertex-worker node-role.kubernetes.io/worker=""
kubectl label node zenith-worker node-role.kubernetes.io/worker=""
```
```plaintext
NAME STATUS ROLES AGE VERSION
apex-master Ready control-plane 5d4h v1.32.7
apex-worker Ready worker 5d1h v1.32.7
vertex-master Ready control-plane 5d1h v1.32.7
vertex-worker Ready worker 5d1h v1.32.7
zenith-master Ready control-plane 5d1h v1.32.7
zenith-worker Ready worker 5d1h v1.32.7
```
Pour la configuration BGP complète, jai besoin de :
- **CiliumBGPClusterConfig** : paramètres BGP pour le cluster Cilium, incluant son ASN local et son pair.
- **CiliumBGPPeerConfig** : définit les timers, le redémarrage gracieux et les routes annoncées.
- **CiliumBGPAdvertisement** : indique quels services Kubernetes annoncer via BGP.
- **CiliumLoadBalancerIPPool** : définit la plage dIPs attribuées aux services `LoadBalancer`.
```yaml
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPClusterConfig
metadata:
name: bgp-cluster
spec:
nodeSelector:
matchLabels:
node-role.kubernetes.io/worker: "" # Only for worker nodes
bgpInstances:
- name: "cilium-bgp-cluster"
localASN: 64513 # Cilium ASN
peers:
- name: "pfSense-peer"
peerASN: 64512 # OPNsense ASN
peerAddress: 192.168.66.1 # OPNsense IP
peerConfigRef:
name: "bgp-peer"
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPPeerConfig
metadata:
name: bgp-peer
spec:
timers:
holdTimeSeconds: 9
keepAliveTimeSeconds: 3
gracefulRestart:
enabled: true
restartTimeSeconds: 15
families:
- afi: ipv4
safi: unicast
advertisements:
matchLabels:
advertise: "bgp"
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPAdvertisement
metadata:
name: bgp-advertisement
labels:
advertise: bgp
spec:
advertisements:
- advertisementType: "Service"
service:
addresses:
- LoadBalancerIP
selector:
matchExpressions:
- { key: somekey, operator: NotIn, values: [ never-used-value ] }
---
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: "dmz"
spec:
blocks:
- start: "192.168.55.20" # LB Range Start IP
stop: "192.168.55.250" # LB Range End IP
```
Applique la configuration :
```bash
kubectl apply -f bgp.yaml
ciliumbgpclusterconfig.cilium.io/bgp-cluster created
ciliumbgppeerconfig.cilium.io/bgp-peer created
ciliumbgpadvertisement.cilium.io/bgp-advertisement created
ciliumloadbalancerippool.cilium.io/dmz created
```
Si tout fonctionne, tu devrais voir les sessions BGP **établies** avec tes workers :
```bash
cilium bgp peers
Node Local AS Peer AS Peer Address Session State Uptime Family Received Advertised
apex-worker 64513 64512 192.168.66.1 established 6m30s ipv4/unicast 1 2
vertex-worker 64513 64512 192.168.66.1 established 7m9s ipv4/unicast 1 2
zenith-worker 64513 64512 192.168.66.1 established 6m13s ipv4/unicast 1 2
```
### Déployer un Service `LoadBalancer` avec BGP
Validons rapidement que la configuration fonctionne en déployant un `Deployment` de test et un `Service` de type `LoadBalancer` :
```yaml
---
apiVersion: v1
kind: Service
metadata:
name: test-lb
spec:
type: LoadBalancer
ports:
- port: 80
targetPort: 80
protocol: TCP
name: http
selector:
svc: test-lb
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx
spec:
selector:
matchLabels:
svc: test-lb
template:
metadata:
labels:
svc: test-lb
spec:
containers:
- name: web
image: nginx
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
readinessProbe:
httpGet:
path: /
port: 80
```
Vérifions si le service obtient une IP externe :
```bash
kubectl get services test-lb
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
test-lb LoadBalancer 10.100.167.198 192.168.55.20 80:31350/TCP 169m
```
Le service a récupéré la première IP du pool défini : `192.168.55.20`.
Depuis nimporte quel appareil du LAN, on peut tester laccès sur le port 80 :
![Test LoadBalancer service with BGP](img/k8s-test-loadbalancer-service-with-bgp.png)
✅ Notre pod est joignable via une IP `LoadBalancer` routée en BGP. Première étape réussie !
---
## Kubernetes Ingress
Nous avons réussi à exposer un pod en externe en utilisant un service `LoadBalancer` et une adresse IP attribuée via BGP. Cette approche fonctionne très bien pour les tests, mais elle ne fonctionne pas à léchelle.
Imagine avoir 10, 20 ou 50 services différents. Est-ce que je voudrais vraiment allouer 50 adresses IP et encombrer mon firewall ainsi que mes tables de routage avec 50 entrées BGP ? Certainement pas.
Cest là quintervient **Ingress**.
### Quest-ce quun Kubernetes Ingress ?
Un Kubernetes **Ingress** est un objet API qui gère **laccès externe aux services** dun cluster, généralement en HTTP et HTTPS, le tout via un point dentrée unique.
Au lieu dattribuer une IP par service, on définit des règles de routage basées sur :
- **Des noms dhôtes** (`app1.vezpi.me`, `blog.vezpi.me`, etc.)
- **Des chemins** (`/grafana`, `/metrics`, etc.)
Avec Ingress, je peux exposer plusieurs services via la même IP et le même port (souvent 443 pour HTTPS), et Kubernetes saura comment router la requête vers le bon service backend.
Voici un exemple simple d`Ingress`, qui route le trafic de `test.vezpi.me` vers le service `test-lb` sur le port 80 :
```yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress
spec:
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
### Ingress Controller
Un Ingress, en soi, nest quun ensemble de règles de routage. Il ne traite pas réellement le trafic. Pour le rendre fonctionnel, il faut un **Ingress Controller**, qui va :
- Surveiller lAPI Kubernetes pour détecter les ressources `Ingress`.
- Ouvrir les ports HTTP(S) via un service `LoadBalancer` ou `NodePort`.
- Router le trafic vers le bon `Service` selon les règles de lIngress.
Parmi les contrôleurs populaires, on retrouve NGINX, Traefik, HAProxy, et dautres encore. Comme je cherchais quelque chose de simple, stable et largement adopté, jai choisi le **NGINX Ingress Controller**.
### Installer NGINX Ingress Controller
Jutilise Helm pour installer le contrôleur, et je définis `controller.ingressClassResource.default=true` pour que tous mes futurs ingress lutilisent par défaut :
```bash
helm install ingress-nginx \
--repo=https://kubernetes.github.io/ingress-nginx \
--namespace=ingress-nginx \
--create-namespace ingress-nginx \
--set controller.ingressClassResource.default=true \
--set controller.config.strict-validate-path-type=false
```
Le contrôleur est déployé et expose un service `LoadBalancer`. Dans mon cas, il récupère la deuxième adresse IP disponible dans la plage BGP :
```bash
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.21 80:31195/TCP,443:30974/TCP 75s app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
```
### Réserver une IP statique pour le contrôleur
Je veux massurer que lIngress Controller reçoive toujours la même adresse IP. Pour cela, jai créé deux pools dIP Cilium distincts :
- Un réservé pour lIngress Controller avec une seule IP.
- Un pour tout le reste.
```yaml
---
# Pool for Ingress Controller
apiVersion: cilium.io/v2alpha1
kind: CiliumLoadBalancerIPPool
metadata:
name: ingress-nginx
spec:
blocks:
- cidr: 192.168.55.55/32
serviceSelector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
---
# Default pool for other services
apiVersion: cilium.io/v2alpha1
kind: CiliumLoadBalancerIPPool
metadata:
name: default
spec:
blocks:
- start: 192.168.55.100
stop: 192.168.55.250
serviceSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: NotIn
values:
- ingress-nginx
```
Après avoir remplacé le pool partagé par ces deux pools, lIngress Controller reçoit bien lIP dédiée `192.168.55.55`, et le service `test-lb` obtient `192.168.55.100` comme prévu :
```bash
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default test-lb LoadBalancer 10.100.167.198 192.168.55.100 80:31350/TCP 6h34m
ingress-nginx ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.55 80:31195/TCP,443:30974/TCP 24m
```
### Associer un Service à un Ingress
Maintenant, connectons un service à ce contrôleur.
Je commence par mettre à jour le service `LoadBalancer` dorigine pour le convertir en `ClusterIP` (puisque cest désormais lIngress Controller qui lexposera en externe) :
```yaml
---
apiVersion: v1
kind: Service
metadata:
name: test-lb
spec:
ports:
- port: 80
targetPort: 80
protocol: TCP
name: http
selector:
svc: test-lb
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress
spec:
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
Ensuite, japplique le manifeste `Ingress` pour exposer le service en HTTP.
Comme jutilise le plugin **Caddy** dans OPNsense, jai encore besoin dun routage local de type Layer 4 pour rediriger le trafic de `test.vezpi.me` vers ladresse IP de lIngress Controller (`192.168.55.55`). Je crée donc une nouvelle règle dans le plugin Caddy.
![Create Layer4 router in Caddy plugin for OPNsense](img/opnsense-caddy-create-layer4-route-http.png)
Puis je teste laccès dans le navigateur :
![ ](img/ingress-controller-nginx-test-simple-webserver.png)
Test dun Ingress en HTTP
✅ Mon pod est désormais accessible via son URL HTTP en utilisant un Ingress. Deuxième étape complétée !
---
## Connexion sécurisée avec TLS
Exposer des services en HTTP simple est suffisant pour des tests, mais en pratique nous voulons presque toujours utiliser **HTTPS**. Les certificats TLS chiffrent le trafic et garantissent lauthenticité ainsi que la confiance pour les utilisateurs.
### Cert-Manager
Pour automatiser la gestion des certificats dans Kubernetes, nous utilisons **Cert-Manager**. Il peut demander, renouveler et gérer les certificats TLS sans intervention manuelle.
#### Installer Cert-Manager
Nous le déployons avec Helm dans le cluster :
```bash
helm repo add jetstack https://charts.jetstack.io
helm repo update
helm install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--set crds.enabled=true
```
#### Configurer Cert-Manager
Ensuite, nous configurons un **ClusterIssuer** pour Lets Encrypt. Cette ressource indique à Cert-Manager comment demander des certificats :
```yaml
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
server: https://acme-staging-v02.api.letsencrypt.org/directory
email: <email>
privateKeySecretRef:
name: letsencrypt-staging-key
solvers:
- http01:
ingress:
ingressClassName: nginx
```
Ici, je définis le serveur **staging** de Lets Encrypt ACME pour les tests. Les certificats de staging ne sont pas reconnus par les navigateurs, mais ils évitent datteindre les limites strictes de Lets Encrypt lors du développement.
Appliquez-le :
```bash
kubectl apply -f clusterissuer.yaml
```
Vérifiez si votre `ClusterIssuer` est `Ready` :
```bash
kubectl get clusterissuers.cert-manager.io
NAME READY AGE
letsencrypt-staging True 14m
```
Sil ne devient pas `Ready`, utilisez `kubectl describe` sur la ressource pour le diagnostiquer.
### Ajouter TLS dans un Ingress
Nous pouvons maintenant sécuriser notre service avec TLS en ajoutant une section `tls` dans la spécification `Ingress` et en référençant le `ClusterIssuer` :
```yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress-https
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
cert-manager.io/cluster-issuer: letsencrypt-staging
spec:
tls:
- hosts:
- test.vezpi.me
secretName: test-vezpi-me-tls
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
En arrière-plan, Cert-Manager suit ce flux pour émettre le certificat :
- Détecte l`Ingress` avec `tls` et le `ClusterIssuer`.
- Crée un CRD **Certificate** décrivant le certificat souhaité + lemplacement du Secret.
- Crée un CRD **Order** pour représenter une tentative démission avec Lets Encrypt.
- Crée un CRD **Challenge** (par ex. validation HTTP-01).
- Met en place un Ingress/Pod temporaire pour résoudre le challenge.
- Crée un CRD **CertificateRequest** et envoie le CSR à Lets Encrypt.
- Reçoit le certificat signé et le stocke dans un Secret Kubernetes.
- LIngress utilise automatiquement ce Secret pour servir en HTTPS.
✅ Une fois ce processus terminé, votre Ingress est sécurisé avec un certificat TLS.
![Certificat TLS validé avec le serveur de staging de Lets Encrypt](img/k8s-test-deploy-service-tls-certificate-staging-lets-encrypt.png)
### Passer aux certificats de production
Une fois que le staging fonctionne, nous pouvons passer au serveur **production** ACME pour obtenir un certificat Lets Encrypt reconnu :
```yaml
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt
spec:
acme:
server: https://acme-v02.api.letsencrypt.org/directory
email: <email>
privateKeySecretRef:
name: letsencrypt-key
solvers:
- http01:
ingress:
ingressClassName: nginx
```
Mettez à jour l`Ingress` pour pointer vers le nouveau `ClusterIssuer` :
```yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress-https
annotations:
cert-manager.io/cluster-issuer: letsencrypt
spec:
tls:
- hosts:
- test.vezpi.me
secretName: test-vezpi-me-tls
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
Comme le certificat de staging est encore stocké dans le Secret, je le supprime pour forcer une nouvelle demande en production :
```bash
kubectl delete secret test-vezpi-me-tls
```
🎉 Mon `Ingress` est désormais sécurisé avec un certificat TLS valide délivré par Lets Encrypt. Les requêtes vers `https://test.vezpi.me` sont chiffrées de bout en bout et routées par le NGINX Ingress Controller jusquà mon pod `nginx` :
![Ingress HTTPS avec certificat validé par Lets Encrypt](img/k8s-deploy-test-service-tls-certificate-lets-encrypt.png)
---
## Conclusion
Dans ce parcours, je suis parti des bases, en exposant un simple pod avec un service `LoadBalancer`, puis jai construit étape par étape une configuration prête pour la production :
- Compréhension des **Services Kubernetes** et de leurs différents types.
- Utilisation du **BGP avec Cilium** et OPNsense pour attribuer des IP externes directement depuis mon réseau.
- Introduction des **Ingress** pour mieux passer à léchelle, en exposant plusieurs services via un point dentrée unique.
- Installation du **NGINX Ingress Controller** pour gérer le routage.
- Automatisation de la gestion des certificats avec **Cert-Manager**, afin de sécuriser mes services avec des certificats TLS Lets Encrypt.
🚀 Résultat : mon pod est maintenant accessible via une véritable URL, sécurisé en HTTPS, comme nimporte quelle application web moderne.
Cest une étape importante dans mon aventure Kubernetes en homelab. Dans le prochain article, je souhaite explorer le stockage persistant et connecter mon cluster Kubernetes à mon setup **Ceph** sous **Proxmox**.
A la prochaine !

View File

@@ -1,630 +0,0 @@
---
slug: expose-kubernetes-pods-externally-ingress-tls
title: Exposing Kubernetes Pods externally with Ingress and TLS
description: Learn how to expose Kubernetes pods externally with Services, Ingress, and TLS using BGP, NGINX, and Cert-Manager in a homelab setup.
date: 2025-08-19
draft: false
tags:
- kubernetes
- helm
- bgp
- opnsense
- cilium
- nginx-ingress-controller
- cert-manager
categories:
- homelab
---
## Intro
After building my own Kubernetes cluster in my homelab using `kubeadm` in [that post]({{< ref "post/8-create-manual-kubernetes-cluster-kubeadm" >}}), my next challenge is to expose a simple pod externally, reachable with an URL and secured with a TLS certificate verified by Let's Encrypt.
To achieve this, I needed to configure several components:
- **Service**: Expose the pod inside the cluster and provide an access point.
- **Ingress**: Define routing rules to expose HTTP(S) services externally.
- **Ingress Controller**: Listen to Ingress resources and handles actual traffic routing.
- **TLS Certificates**: Secure traffic with HTTPS using certificates from Lets Encrypt.
This post guides you through each step to understand how external access works in Kubernetes in a homelab environment.
Lets dive in.
---
## Helm
I use **Helm**, the de facto package manager for Kubernetes, to install external components like the Ingress controller or cert-manager.
### Why Helm
Helm simplifies the deployment and management of Kubernetes applications. Instead of writing and maintaining large YAML manifests, Helm lets you install applications with a single command, using versioned and configurable charts.
### Install Helm
I install Helm on my LXC bastion host, which already has access to the Kubernetes cluster:
```bash
curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
sudo apt update
sudo apt install helm
```
---
## Kubernetes Services
Before we can expose a pod externally, we need a way to make it reachable inside the cluster. Thats where Kubernetes Services come in.
Services act as the bridge between pods and the network, making sure applications remain reachable even as pods are rescheduled.
There are several types of Kubernetes Services, each serving a different purpose:
- **ClusterIP** exposes the Service on a cluster-internal IP, only accessible inside the cluster.
- **NodePort** exposes the Service on a static port on each nodes IP, accessible from outside the cluster.
- **LoadBalancer** exposes the Service on an external IP, typically using cloud integrations (or BGP in a homelab).
---
## Expose a `LoadBalancer` Service with BGP
Initially, I considered using **MetalLB** to expose service IPs to my home network. Thats what I used in the past when relying on my ISP box as the main router. But after reading this post, [Use Cilium BGP integration with OPNsense](https://devopstales.github.io/kubernetes/cilium-opnsense-bgp/), I realize I can achieve the same (or even better) using BGP with my **OPNsense** router and **Cilium**, my CNI.
### What Is BGP?
BGP (Border Gateway Protocol) is a routing protocol used to exchange network routes between systems. In the Kubernetes homelab context, BGP allows your Kubernetes nodes to advertise IPs directly to your network router or firewall. Your router then knows how to reach the IPs managed by your cluster.
So instead of MetalLB managing IP allocation and ARP replies, your nodes directly tell your router: « Hey, I own 192.168.1.240 ».
### Legacy MetalLB Approach
Without BGP, MetalLB in Layer 2 mode works like this:
- Assigns a `LoadBalancer` IP (e.g., `192.168.1.240`) from a pool.
- One node responds to ARP for that IP on your LAN.
Yes, MetalLB can also work with BGP, but what if my CNI (Cilium) can handle it out of the box?
### BGP with Cilium
With Cilium + BGP, you get:
- Ciliums agent on the node advertises LoadBalancer IPs over BGP.
- Your router learns that IP and routes to the correct node.
- No need for MetalLB.
### BGP Setup
BGP is disabled by default on both OPNsense and Cilium. Lets enable it on both ends.
#### On OPNsense
According to the [official OPNsense documentation](https://docs.opnsense.org/manual/dynamic_routing.html#bgp-section), enabling BGP requires installing a plugin.
Head to `System` > `Firmware` > `Plugins` and install the `os-frr` plugin:
![ ](img/opnsense-add-os-frr-plugin.png)
Install `os-frr` plugin in OPNsense
Once installed, enable the plugin under `Routing` > `General`:
![ ](img/opnsense-enable-routing-frr-plugin.png)
Enable routing in OPNsense
Then navigate to the `BGP` section. In the **General** tab:
- Tick the box to enable BGP.
- Set your **BGP ASN**. I used `64512`, the first private ASN from the reserved range (see [ASN table](https://en.wikipedia.org/wiki/Autonomous_system_\(Internet\)#ASN_Table)):
![ ](img/opnsense-enable-bgp.png)
General BGP configuration in OPNsense
Now create your BGP neighbors. Im only peering with my **worker nodes** (since only they run workloads). For each neighbor:
- Set the nodes IP in `Peer-IP`
- Use `64513` as the **Remote AS** (Ciliums ASN)
- Set `Update-Source Interface` to `Lab`
- Tick `Next-Hop-Self`:
![ ](img/opnsense-bgp-create-neighbor.png)
BGP neighbor configuration in OPNsense
Heres how my neighbors list looks once complete:
![ ](img/opnsense-bgp-neighbor-list.png)
BGP neighbor list
Dont forget to create a firewall rule allowing BGP (port `179/TCP`) from the **Lab** VLAN to the firewall:
![ ](img/opnsense-create-firewall-rule-bgp-peering.png)
Allow TCP/179 from Lab to OPNsense
#### In Cilium
I already have Cilium installed and couldnt find a way to enable BGP with the CLI, so I simply reinstall it with the BGP option:
```bash
cilium uninstall
cilium install --set bgpControlPlane.enabled=true
```
I configure only worker nodes to establish BGP peering by labeling them for the `nodeSelector`:
```bash
kubectl label node apex-worker node-role.kubernetes.io/worker=""
kubectl label node vertex-worker node-role.kubernetes.io/worker=""
kubectl label node zenith-worker node-role.kubernetes.io/worker=""
```
```plaintext
NAME STATUS ROLES AGE VERSION
apex-master Ready control-plane 5d4h v1.32.7
apex-worker Ready worker 5d1h v1.32.7
vertex-master Ready control-plane 5d1h v1.32.7
vertex-worker Ready worker 5d1h v1.32.7
zenith-master Ready control-plane 5d1h v1.32.7
zenith-worker Ready worker 5d1h v1.32.7
```
For the entire BGP configuration, I need:
- **CiliumBGPClusterConfig**: BGP settings for the Cilium cluster, including its local ASN and its peer
- **CiliumBGPPeerConfig**: Sets BGP timers, graceful restart, and route advertisement settings.
- **CiliumBGPAdvertisement**: Defines which Kubernetes services should be advertised via BGP.
- **CiliumLoadBalancerIPPool**: Configures the range of IPs assigned to Kubernetes LoadBalancer services.
```yaml
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPClusterConfig
metadata:
name: bgp-cluster
spec:
nodeSelector:
matchLabels:
node-role.kubernetes.io/worker: "" # Only for worker nodes
bgpInstances:
- name: "cilium-bgp-cluster"
localASN: 64513 # Cilium ASN
peers:
- name: "pfSense-peer"
peerASN: 64512 # OPNsense ASN
peerAddress: 192.168.66.1 # OPNsense IP
peerConfigRef:
name: "bgp-peer"
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPPeerConfig
metadata:
name: bgp-peer
spec:
timers:
holdTimeSeconds: 9
keepAliveTimeSeconds: 3
gracefulRestart:
enabled: true
restartTimeSeconds: 15
families:
- afi: ipv4
safi: unicast
advertisements:
matchLabels:
advertise: "bgp"
---
apiVersion: cilium.io/v2alpha1
kind: CiliumBGPAdvertisement
metadata:
name: bgp-advertisement
labels:
advertise: bgp
spec:
advertisements:
- advertisementType: "Service"
service:
addresses:
- LoadBalancerIP
selector:
matchExpressions:
- { key: somekey, operator: NotIn, values: [ never-used-value ] }
---
apiVersion: "cilium.io/v2alpha1"
kind: CiliumLoadBalancerIPPool
metadata:
name: "dmz"
spec:
blocks:
- start: "192.168.55.20" # LB Range Start IP
stop: "192.168.55.250" # LB Range End IP
```
Apply it:
```bash
kubectl apply -f bgp.yaml
ciliumbgpclusterconfig.cilium.io/bgp-cluster created
ciliumbgppeerconfig.cilium.io/bgp-peer created
ciliumbgpadvertisement.cilium.io/bgp-advertisement created
ciliumloadbalancerippool.cilium.io/dmz created
```
If everything works, you should see the BGP sessions **established** with your workers:
```bash
cilium bgp peers
Node Local AS Peer AS Peer Address Session State Uptime Family Received Advertised
apex-worker 64513 64512 192.168.66.1 established 6m30s ipv4/unicast 1 2
vertex-worker 64513 64512 192.168.66.1 established 7m9s ipv4/unicast 1 2
zenith-worker 64513 64512 192.168.66.1 established 6m13s ipv4/unicast 1 2
```
### Deploying a `LoadBalancer` Service with BGP
Lets quickly validate that the setup works by deploying a test `Deployment` and `LoadBalancer` `Service`:
```yaml
---
apiVersion: v1
kind: Service
metadata:
name: test-lb
spec:
type: LoadBalancer
ports:
- port: 80
targetPort: 80
protocol: TCP
name: http
selector:
svc: test-lb
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx
spec:
selector:
matchLabels:
svc: test-lb
template:
metadata:
labels:
svc: test-lb
spec:
containers:
- name: web
image: nginx
imagePullPolicy: IfNotPresent
ports:
- containerPort: 80
readinessProbe:
httpGet:
path: /
port: 80
```
Check if it gets an external IP:
```bash
kubectl get services test-lb
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
test-lb LoadBalancer 10.100.167.198 192.168.55.20 80:31350/TCP 169m
```
The service got the first IP from our defined pool: `192.168.55.20`.
Now from any device on the LAN, try to reach that IP on port 80:
![Test LoadBalancer service with BGP](img/k8s-test-loadbalancer-service-with-bgp.png)
✅ Our pod is reachable through BGP-routed `LoadBalancer` IP, first step successful!
---
## Kubernetes Ingress
We managed to expose a pod externally using a `LoadBalancer` service and a BGP-assigned IP address. This approach works great for testing, but it doesn't scale well.
Imagine having 10, 20, or 50 different services, would I really want to allocate 50 IP addresses, and clutter my firewall and routing tables with 50 BGP entries? Definitely not.
Thats where **Ingress** kicks in.
### What Is a Kubernetes Ingress?
A Kubernetes **Ingress** is an API object that manages **external access to services** in a cluster, typically HTTP and HTTPS, all through a single entry point.
Instead of assigning one IP per service, you define routing rules based on:
- **Hostnames** (`app1.vezpi.me`, `blog.vezpi.me`, etc.)
- **Paths** (`/grafana`, `/metrics`, etc.)
With Ingress, I can expose multiple services over the same IP and port (usually 443 for HTTPS), and Kubernetes will know how to route the request to the right backend service.
Here is an example of a simple `Ingress`, routing traffic of `test.vezpi.me` to the `test-lb` service on port 80:
```yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress
spec:
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
### Ingress Controller
On its own, an Ingress is just a set of routing rules. It doesnt actually handle traffic. To bring it to life, I need an **Ingress Controller**, which will:
- Watches the Kubernetes API for `Ingress` resources.
- Opens HTTP(S) ports on a `LoadBalancer` or `NodePort` service.
- Routes traffic to the correct `Service` based on the `Ingress` rules.
Popular controllers include NGINX, Traefik, HAProxy, and more. Since I was looking for something simple, stable, and widely adopted, I picked the **NGINX Ingress Controller**.
### Install NGINX Ingress Controller
I use Helm to install the controller, and I set `controller.ingressClassResource.default=true` so that all my future ingresses use it by default:
```bash
helm install ingress-nginx \
--repo=https://kubernetes.github.io/ingress-nginx \
--namespace=ingress-nginx \
--create-namespace ingress-nginx \
--set controller.ingressClassResource.default=true \
--set controller.config.strict-validate-path-type=false
```
The controller is deployed and exposes a `LoadBalancer` service. In my setup, it picks the second available IP in the BGP range:
```bash
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.21 80:31195/TCP,443:30974/TCP 75s app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
```
### Reserving a Static IP for the Controller
I want to make sure the Ingress Controller always receives the same IP address. To do this, I created two separate Cilium IP pools:
- One dedicated for the Ingress Controller with a single IP.
- One for everything else.
```yaml
---
# Pool for Ingress Controller
apiVersion: cilium.io/v2alpha1
kind: CiliumLoadBalancerIPPool
metadata:
name: ingress-nginx
spec:
blocks:
- cidr: 192.168.55.55/32
serviceSelector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
app.kubernetes.io/component: controller
---
# Default pool for other services
apiVersion: cilium.io/v2alpha1
kind: CiliumLoadBalancerIPPool
metadata:
name: default
spec:
blocks:
- start: 192.168.55.100
stop: 192.168.55.250
serviceSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: NotIn
values:
- ingress-nginx
```
After replacing the previous shared pool with these two, the Ingress Controller gets the desired IP `192.168.55.55`, and the `test-lb` service picks `192.168.55.100` as expected:
```bash
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
default test-lb LoadBalancer 10.100.167.198 192.168.55.100 80:31350/TCP 6h34m
ingress-nginx ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.55 80:31195/TCP,443:30974/TCP 24m
```
### Associate a Service to an Ingress
Now lets wire up a service to this controller.
First, I update the original `LoadBalancer` service and convert it into a `ClusterIP` (since the Ingress Controller will now expose it externally):
```yaml
---
apiVersion: v1
kind: Service
metadata:
name: test-lb
spec:
ports:
- port: 80
targetPort: 80
protocol: TCP
name: http
selector:
svc: test-lb
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress
spec:
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
Then I apply the `Ingress` manifest as shown earlier to expose the service over HTTP.
Since I'm using the Caddy plugin on OPNsense, I still need a local Layer 4 route to forward traffic for `test.vezpi.me` to the NGINX Ingress Controller IP (`192.168.55.55`). I simply create a new rule in the Caddy plugin.
![Create Layer4 router in Caddy plugin for OPNsense](img/opnsense-caddy-create-layer4-route-http.png)
Now lets test it in the browser:
![ ](img/ingress-controller-nginx-test-simple-webserver.png)
Test Ingress on HTTP
✅ Our pod is now reachable on its HTTP URL using an Ingress. Second step complete!
---
## Secure Connection with TLS
Exposing services over plain HTTP is fine for testing, but in practice we almost always want **HTTPS**. TLS certificates encrypt traffic and provide authenticity and trust to users.
### Cert-Manager
To automate certificate management in Kubernetes, we use **Cert-Manager**. It can request, renew, and manage TLS certificates without manual intervention.
#### Install Cert-Manager
We deploy it with Helm on the cluster:
```bash
helm repo add jetstack https://charts.jetstack.io
helm repo update
helm install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--set crds.enabled=true
```
#### Setup Cert-Manager
Next, we configure a **ClusterIssuer** for Lets Encrypt. This resource tells Cert-Manager how to request certificates:
```yaml
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-staging
spec:
acme:
server: https://acme-staging-v02.api.letsencrypt.org/directory
email: <email>
privateKeySecretRef:
name: letsencrypt-staging-key
solvers:
- http01:
ingress:
ingressClassName: nginx
```
Here I define the **staging** Lets Encrypt ACME server for testing purposes. Staging certificates are not trusted by browsers, but they prevent hitting Lets Encrypts strict rate limits during development.
Apply it:
```bash
kubectl apply -f clusterissuer.yaml
```
Verify if your `ClusterIssuer` is `Ready`:
```bash
kubectl get clusterissuers.cert-manager.io
NAME READY AGE
letsencrypt-staging True 14m
```
If it doesnt become `Ready`, use `kubectl describe` on the resource to troubleshoot.
### Add TLS in an Ingress
Now we can secure our service with TLS by adding a `tls` section in the `Ingress` spec and referencing the `ClusterIssuer`:
```yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress-https
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
cert-manager.io/cluster-issuer: letsencrypt-staging
spec:
tls:
- hosts:
- test.vezpi.me
secretName: test-vezpi-me-tls
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
Behind the scenes, Cert-Manager goes through this workflow to issue the certificate:
- Detects the `Ingress` with `tls` and the `ClusterIssuer`.
- Creates a Certificate CRD that describes the desired cert + Secret storage.
- Creates an Order CRD to represent one issuance attempt with Lets Encrypt.
- Creates a Challenge CRD (e.g., HTTP-01 validation).
- Provisions a temporary solver Ingress/Pod to solve the challenge.
- Creates a CertificateRequest CRD and sends the CSR to Lets Encrypt.
- Receives the signed certificate and stores it in a Kubernetes Secret.
- The Ingress automatically uses the Secret to serve HTTPS.
✅ Once this process completes, your Ingress is secured with a TLS certificate.
![Certificat TLS validé avec le serveur de staging de Lets Encrypt](img/k8s-test-deploy-service-tls-certificate-staging-lets-encrypt.png)
### Switch to Production Certificates
Once staging works, we can safely switch to the **production** ACME server to get a trusted certificate from Lets Encrypt:
```yaml
---
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt
spec:
acme:
server: https://acme-v02.api.letsencrypt.org/directory
email: <email>
privateKeySecretRef:
name: letsencrypt-key
solvers:
- http01:
ingress:
ingressClassName: nginx
```
Update the `Ingress` to reference the new `ClusterIssuer`:
```yaml
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: test-ingress-https
annotations:
cert-manager.io/cluster-issuer: letsencrypt
spec:
tls:
- hosts:
- test.vezpi.me
secretName: test-vezpi-me-tls
rules:
- host: test.vezpi.me
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: test-lb
port:
number: 80
```
Since the staging certificate is still stored in the Secret, I delete it to trigger a fresh request against production:
```bash
kubectl delete secret test-vezpi-me-tls
```
🎉 My `Ingress` is now secured with a valid TLS certificate from Lets Encrypt. Requests to `https://test.vezpi.me` are encrypted end-to-end and routed by the NGINX Ingress Controller to my `nginx` pod:
![Ingress HTTPS avec certificat validé par Lets Encrypt](img/k8s-deploy-test-service-tls-certificate-lets-encrypt.png)
---
## Conclusion
In this journey, I started from the basics, exposing a single pod with a `LoadBalancer` service, and step by step built a production-ready setup:
- Learned about **Kubernetes Services** and their different types.
- Used **BGP with Cilium** and OPNsense to assign external IPs directly from my network.
- Introduced **Ingress** to scale better, exposing multiple services through a single entry point.
- Installed the **NGINX Ingress Controller** to handle routing.
- Automated certificate management with **Cert-Manager**, securing my services with Lets Encrypt TLS certificates.
🚀 The result: my pod is now reachable at a real URL, secured with HTTPS, just like any modern web application.
This is a huge milestone in my homelab Kubernetes journey. In the next article, I want to explore persistent storage and connect my Kubernetes cluster to my **Ceph** setup on **Proxmox**.

View File

@@ -1,19 +1,15 @@
---
slug: blog-deployment-obisidan-hugo-gitea-actions
title: Blog Deployment featuring Obsidian, Hugo and Gitea Actions
description: How I automated my self-hosted blog using Obsidian, Gitea Actions, and Hugo to publish posts directly from my personal notes.
date: 2025-05-02
draft: false
draft: true
tags:
- obsidian
- hugo
- gitea
- gitea-actions
- docker
categories:
- blog
- homelab
---
## 💡 Intro
## 💡 Introduction
I always wanted to share my own experiences to give others ideas or help them on their projects.
@@ -25,7 +21,7 @@ I wanted the entire process to be automated, self-hosted, and integrated into th
## 🔧 Tools
### Obsidian
Before I was using [Notion](https://www.notion.com), but some months ago I switched to [Obsidian](https://obsidian.md/). It's a markdown-based note-taking app that stores everything locally, which gives me more flexibility and control.
Before I was using Notion, but some months ago I switched to [Obsidian](https://obsidian.md/). It's a markdown-based note-taking app that stores everything locally, which gives me more flexibility and control.
To sync my notes between devices, I use the [Obsidian Git plugin](https://github.com/denolehov/obsidian-git), which commits changes to a Git repository hosted on my self-hosted Gitea instance.
@@ -41,9 +37,7 @@ To run those workflows, I installed a [Gitea runner](https://gitea.com/gitea/act
### Hugo
[Hugo](https://gohugo.io/) is a fast and flexible static site generator written in Go. Its perfect for generating content from Markdown files. Hugo is highly customizable, supports themes, and can generate a complete website in seconds.
Its ideal for a blog based on Obsidian notes, and it works beautifully in CI/CD pipelines due to its speed and simplicity.
[Hugo](https://gohugo.io/) is a fast and flexible static site generator written in Go. Its perfect for generating content from Markdown files. Hugo is highly customizable, supports themes, and can generate a complete website in seconds. Its ideal for a blog based on Obsidian notes, and it works beautifully in CI/CD pipelines due to its speed and simplicity.
---
## 🔁 Workflow
@@ -52,15 +46,13 @@ The idea is simple:
1. I write blog content in my Obsidian vault, under a specific `Blog` folder.
2. When I'm done editing the file, the Obisdian Git plugin automatically commits and push updates to the Gitea repository
3. When Gitea receives that push, a first Gitea Action is triggered.
4. The first action syncs the updated blog content to another separate [Git repository](https://git.vezpi.com/Vezpi/blog) which hosts my blog content.
4. The first action syncs the updated blog content to another separate [Git repository](https://git.vezpi.me/Vezpi/blog) which hosts my blog content.
5. In that blog repository, another Gitea Action is triggered.
6. The second Gitea Action generates the static web pages while upgrading Hugo if needed
7. The blog is now updated (the one you are reading).
This way, I never need to manually copy files or trigger deployments. Everything flows from writing markdown in Obsidian to having a fully deployed website.
![Workflow depuis l'écriture de notes sur Obsidian au Blog publié](img/obsidian-blog-gitea-actions-workflow.png)
---
## ⚙️ Implementation
@@ -77,7 +69,7 @@ The Obsidian vault is a private Git repository self-hosted in Gitea. I use docke
container_name: gitea_runner
restart: on-failure
environment:
- GITEA_INSTANCE_URL=https://git.vezpi.com
- GITEA_INSTANCE_URL=https://git.vezpi.me
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
- GITEA_RUNNER_NAME=self-hosted
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
@@ -100,27 +92,26 @@ container:
```
The runner appears in the `Administration Area`, under `Actions`>`Runners`. To obtain the registration token, click on the `Create new Runner` button
![New runner visible in Gitea](img/gitea-runners-management.png)
![Pasted_image_20250502230954.png](Images/Pasted_image_20250502230954.png)
### Step 3: Set up Gitea Actions for Obsidian Repository
First I enabled the Gitea Actions, this is disabled by default, tick the box `Enable Repository Actions` in the settings for that repository
I created a new PAT (Personal Access Token) with RW permission on the repositories
![New personal access token creation in Gitea](img/gitea-new-pat.png)
![Pasted_image_20250501235521.png](Images/Pasted_image_20250501235521.png)
I added this token as secret `REPO_TOKEN` in the repository
![Add secret window for repository in Gitea](img/gitea-add-repo-secret.png)
![Pasted_image_20250501235427.png](Images/Pasted_image_20250501235427.png)
I needed to create the workflow that will spin-up a container and do the following:
1. When I push new/updated files in the `Blog` folder
2. Checkout the current repository (Obsidian vault)
3. Clone the blog repository
4. Transfer blog content from Obsidian
5. Commit the change to the blog repository
- When I push new/updated files in the `Blog` folder
- Checkout the current repository (Obsidian vault)
- Clone the blog repository
- Transfer blog content from Obsidian
- Commit the change to the blog repository
**sync_blog.yml**
`.gitea/workflows/sync_blog.yml`
```yaml
name: Synchronize content with the blog repo
on:
@@ -139,24 +130,23 @@ jobs:
uses: actions/checkout@v4
- name: Clone the blog repository
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git
- name: Transfer blog content from Obsidian
run: |
echo "Copy Markdown files"
rsync -av --delete Blog/ blog/content
# Gather all used images from markdown files
used_images=$(grep -rhoE '^!\[\[.*\]\]' blog/content | sed -E 's/!\[\[(.*)\]\]/\1/' | sort -u)
# Create the target image folder
mkdir -p blog/static/img
used_images=$(grep -rhoE '!\[\[.*\]\]' blog/content | sed -E 's/!\[\[(.*)\]\]/\1/' | sort -u)
mkdir -p blog/assets/Images
# Loop over each used image"
while IFS= read -r image; do
# Loop through all .md files and replace image links
grep -rl "$image" blog/content/* | while IFS= read -r md_file; do
sed -i "s|\!\[\[$image\]\]|\!\[${image// /_}\](img/${image// /_})|g" "$md_file"
sed -i "s|\!\[\[$image\]\]|\!\[${image// /_}\](Images/${image// /_})|g" "$md_file"
done
echo "Copy the image ${image// /_} to the static folder"
cp "Images/$image" "blog/static/img/${image// /_}"
cp "Images/$image" "blog/assets/Images/${image// /_}"
done <<< "$used_images"
- name: Commit the change to the blog repository
@@ -168,12 +158,14 @@ jobs:
git add .
git commit -m "Auto-update blog content from Obsidian: $(date '+%F %T')" || echo "Nothing to commit"
git push -u origin main
```
Obsidian uses wiki-style links for images, like `![[image name.png]]`, which isn't compatible with Hugo out of the box. Here's how I automated a workaround in a Gitea Actions workflow:
- I find all used image references in `.md` files.
- For each referenced image, I update the link in relevant `.md` files like `![image name](img/image_name.png)`.
- I then copy those used images to the blog's static directory while replacing white-spaces by underscores.
- For each referenced image, I update the link in relevant `.md` files like `![image name](Images/image_name.png)`.
- I then copy those used images to the blog's assets directory while replacing white-spaces by underscores.
### Step 4: Gitea Actions for Blog Repository
@@ -181,10 +173,10 @@ The blog repository contains the full Hugo site, including the synced content an
Its workflow:
- Checkout the blog repository
- Check if the Hugo version is up-to-date. If not, it downloads the latest release.
- Check if the Hugo version is up-to-date. If not, it downloads the latest release and replaces the old binary.
- Build the static website using Hugo.
**deploy_blog.yml**
`.gitea/workflows/deploy_blog.yml`
```yaml
name: Deploy
on: [push]
@@ -197,45 +189,38 @@ jobs:
volumes:
- /appli/data/blog:/blog
steps:
- name: Install prerequisites
run: apt update && apt install -y jq
- name: Check out repository
run: |
cd ${BLOG_FOLDER}
git config --global user.name "Gitea Actions"
git config --global user.email "actions@local"
git config --global --add safe.directory ${BLOG_FOLDER}
git submodule update --init --recursive
git fetch origin
git reset --hard origin/main
git pull
- name: Get current Hugo version
run: |
current_version=$(${BLOG_FOLDER}/hugo version | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+')
echo "current_version=$current_version" | tee -a $GITEA_ENV
run: echo "current_version=$(${BLOG_FOLDER}/bin/hugo version | grep -oE 'v[0-9]+\.[0-9]+\.[0-9]+')" | tee -a $GITEA_ENV
- name: Verify latest Hugo version
run: |
latest_version=$(curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | grep -oP '"tag_name": "\K[^"]+')
echo "latest_version=$latest_version" | tee -a $GITEA_ENV
run: echo "latest_version=$(curl -s https://api.github.com/repos/gohugoio/hugo/releases/latest | jq -r .tag_name)" | tee -a $GITEA_ENV
- name: Download latest Hugo version
if: env.current_version != env.latest_version
run: |
rm -f ${BLOG_FOLDER}/{LICENSE,README.md,hugo}
curl -L https://github.com/gohugoio/hugo/releases/download/$latest_version/hugo_extended_${latest_version#v}_Linux-64bit.tar.gz -o hugo.tar.gz
tar -xzvf hugo.tar.gz -C ${BLOG_FOLDER}/
tar -xzvf hugo.tar.gz -C ${BLOG_FOLDER}/bin/
- name: Generate the static files with Hugo
run: |
rm -f ${BLOG_FOLDER}/content/posts/template.md
rm -rf ${BLOG_FOLDER}/private/* ${BLOG_FOLDER}/public/*
${BLOG_FOLDER}/hugo -D -b https://blog-dev.vezpi.me -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/private
${BLOG_FOLDER}/hugo -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/public
chown 1000:1000 -R ${BLOG_FOLDER}
${BLOG_FOLDER}/bin/hugo -D -b https://blog-dev.vezpi.me -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/private
${BLOG_FOLDER}/bin/hugo -s ${BLOG_FOLDER} -d ${BLOG_FOLDER}/public
```
---
## 🚀 Results
This workflow allows me to focus on what matters most: writing and refining my content. By automating the publishing pipeline, from syncing my Obsidian notes to building the blog with Hugo, I no longer need to worry about manually managing content in a CMS.
Every note I draft can evolve naturally into a clear, structured article, and the technical workflow fades into the background. Its a simple yet powerful way to turn personal knowledge into shareable documentation.
This workflow allows me to focus on what matters most: writing and refining my content. By automating the publishing pipeline from syncing my Obsidian notes to building the blog with Hugo I no longer need to worry about manually managing content in a CMS. Every note I draft can evolve naturally into a clear, structured article, and the technical workflow fades into the background. Its a simple yet powerful way to turn personal knowledge into shareable documentation.

View File

@@ -0,0 +1,135 @@
---
title: Welcome to My Homelab Blog
date: 2025-04-25
draft: false
tags:
- homelab
- infrastructure
- docker
---
# 🏡 Welcome to My Homelab
This is a placeholder post to test the features of my new blog powered by **Hugo** and the `PaperMod` theme.
I like :
- Chicken
- Sausage
- Oranges
- Potatoes
- Pasta
- Beer
- Squid
- Doritos
- Banana
- Kiwi
- Pizza
- Automation
- Cats
- Tomatoes
- Fish
- Girls
---
## 🧱 Markdown Elements
### Headers
```markdown
# H1
## H2
### H3
```
### Lists
#### Unordered
- Docker
- Proxmox
- Kubernetes
#### Ordered
1. Build homelab
2. Break homelab 😅
3. Fix it and learn
---
### ✅ Checkboxes
- [x] Installed Hugo
- [x] Added PaperMod theme
- [x] Created this test post
- [ ] Sync with Obsidian vault
---
### 🔗 Links
Check out [PaperMod on GitHub](https://github.com/adityatelange/hugo-PaperMod)
---
### 💬 Blockquote
> “Simplicity is the ultimate sophistication.” — Leonardo da Vinci
---
### 💡 Inline Code
Use `docker compose up -d` to start your stack.
---
### 🧑‍💻 Code Blocks
```bash
# A bash snippet
sudo apt update
sudo apt install hugo
```
```yaml
# A YAML snippet
version: '3'
services:
blog:
image: nginx:alpine
ports:
- "80:80"
```
```go
// A Go snippet (because Hugo is written in Go)
func main() {
fmt.Println("Hello, Hugo!")
}
```
---
### 📸 Image
![Example Image](https://via.placeholder.com/800x400.png?text=My+Blog)
---
### 📁 Directory Tree
```plaintext
myblog/
├── config.toml
├── content/
│ └── posts/
│ └── hello-world.md
└── themes/
└── PaperMod/
```
---
Thanks for stopping by! 🎉

View File

@@ -1,22 +1,20 @@
---
slug: proxmox-cloud-init-vm-template
title: Proxmox - Create a Cloud-Init VM Template
description: Learn how to create a reusable Ubuntu VM template with cloud-init in Proxmox to speed up and simplify virtual machine deployment.
date: 2025-03-31
draft: false
date: 2025-01-31
draft: true
tags:
- proxmox
- cloud-init
categories:
- homelab
---
## Intro
## Introduction
Creating a VM template in Proxmox using cloud-init can streamline VM deployments significantly. This post covers the step-by-step process to set up a cloud-init-enabled VM template using Ubuntu for Proxmox.
Creating a reusable VM template in Proxmox using Cloud-init can streamline VM deployments significantly. This post covers the step-by-step process to set up a cloud-init-enabled VM template using Ubuntu for Proxmox.
Proxmox supports cloud-init, a tool that allows the automatic configuration of virtual machines right after they are provisioned. This includes setting up networking, SSH keys, and other initial settings.
Proxmox supports Cloud-init, a tool that allows the automatic configuration of virtual machines right after they are provisioned. This includes setting up networking, SSH keys, and other initial settings.
In this guide, we'll create a VM template with cloud-init enabled, allowing for rapid deployment of pre-configured VMs.
In this guide, we'll create a VM template with Cloud-init enabled, allowing for rapid deployment of pre-configured VMs.
---
## Why Cloud-init?
@@ -27,16 +25,15 @@ Cloud-init is a widely used tool for automating the initial configuration of clo
## Download the OS Image
First, we need to download an image with cloud-init support. Although Rocky Linux was initially considered, the `.img` format was not available, and the `.qcow2` format caused issues. Instead, we will proceed with the Ubuntu cloud image.
First, we need to download an image with Cloud-init support. Although Rocky Linux was initially considered, the `.img` format was not available, and the `.qcow2` format caused issues. Instead, we will proceed with the Ubuntu cloud image.
Find cloud-ready images from the [OpenStack Image Guide](https://docs.openstack.org/image-guide/obtain-images.html).
In Proxmox, navigate to **Storage > ISO Images > Upload** to upload the downloaded image.
![Download window for ISO images in Proxmox](img/proxmox-download-iso-img.png)
![Pasted_image_20250131144754.png](Images/Pasted_image_20250131144754.png)
## Create the VM
Next, we create the VM using the command line interface (CLI) from the Proxmox node with the following command:
Next, we create the VM using the command line interface (CLI) with the following command:
```bash
qm create 900 \
@@ -62,7 +59,7 @@ qm set 900 --scsi0 ceph-workload:0,import-from=/var/lib/vz/template/iso/noble-se
### Configure Cloud-init
Add a cloud-init CD drive to the VM:
Add a Cloud-init CD drive to the VM:
```bash
qm set 900 --scsi1 ceph-workload:cloudinit
@@ -86,6 +83,6 @@ After configuring the VM, right-click on the VM in the Proxmox WebUI and select
## Conclusion
This method allows for rapid deployment using Proxmox of pre-configured VMs and cloud-init.
This method allows for rapid deployment of pre-configured VMs using Proxmox and Cloud-init.
The template can now be used to spawn new instances with custom configurations by providing the necessary cloud-init parameters. This is particularly useful for deploying multiple instances with consistent baseline configurations quickly.
The template can now be used to spawn new instances with custom configurations by providing the necessary Cloud-init parameters. This is particularly useful for deploying multiple instances with consistent baseline configurations quickly.

View File

@@ -1,18 +0,0 @@
---
title: Playground
description:
date: 2025-06-25
draft: true
tags:
categories:
---
Hi there, how are you ?
I'm ==testing==
## Emoji
🚀💡🔧🔁⚙️📝📌✅⚠️🍒❌ℹ️⌛🚨🎉
[post]({{< ref "post/0-template" >}})

View File

@@ -1,7 +1,5 @@
---
slug:
title: Template
description:
date:
draft: true
tags:

View File

@@ -1,28 +0,0 @@
FROM nginx:stable
ARG HUGO_VERSION
ENV HUGO_VERSION=${HUGO_VERSION}
ENV HUGO_DEST=/usr/share/nginx/html
# Install dependencies
RUN apt-get update && apt-get install -y \
curl \
git \
ca-certificates \
&& rm -rf /var/lib/apt/lists/*
# Download Hugo
RUN curl -sSL https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_Linux-64bit.tar.gz \
| tar -xz -C /usr/local/bin hugo
# Add entrypoint script
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
# Copy custom nginx config
COPY nginx.conf /etc/nginx/conf.d/default.conf
# Nginx serves on port 80
EXPOSE 80
# Set default entrypoint
ENTRYPOINT ["/entrypoint.sh"]

View File

@@ -1,30 +0,0 @@
#!/bin/sh
set -e
# Configuration
REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
URL="${URL:-blog.vezpi.com}"
BRANCH="${BRANCH:-preview}"
CLONE_DIR="${CLONE_DIR:-/blog}"
DRAFTS=""
# Add drafts for preview
if [ "$BRANCH" = "preview" ]; then
echo "- Adding draft pages to be generated"
DRAFTS="--buildDrafts --buildFuture"
fi
# Clean blog dir
rm -rf "$CLONE_DIR"
# Clone repo
echo "- Cloning $REPO_URL (branch: $BRANCH)..."
git clone --recurse-submodules --branch "$BRANCH" "$REPO_URL" "$CLONE_DIR"
# Generate static files with hugo
echo "- Building site with Hugo v$HUGO_VERSION in $HUGO_DEST..."
hugo --source "$CLONE_DIR" --destination "$HUGO_DEST" --baseURL="https://${URL}" ${DRAFTS} --logLevel info --cleanDestinationDir --gc --panicOnWarning --printI18nWarnings
# Start nginx
echo "- Starting Nginx..."
exec nginx -g 'daemon off;'

View File

@@ -1,26 +0,0 @@
map $http_accept_language $lang {
default en;
~fr fr;
}
server {
listen 80;
server_name _;
root /usr/share/nginx/html;
index index.html;
# Redirect users to their language home page
rewrite ^/$ /$lang/ redirect;
location / {
try_files $uri $uri/ =404;
}
# Custom 404 page
error_page 404 /$lang/404.html;
location = /$lang/404.html {
internal;
}
}

159
hugo.yaml
View File

@@ -1,114 +1,17 @@
baseURL: "https://blog.vezpi.com/"
baseURL: "https://blog.vezpi.me/"
languageCode: "en-us"
title: "Vezpi Lab"
theme: "stack"
languageCode: "en-us"
enableGitInfo: true
DefaultContentLanguage: "en"
defaultContentLanguageInSubdir: true
languages:
en:
languageName: English
weight: 1
menu:
main:
- identifier: categories
name: Categories
weight: 40
url: /en/categories
params:
icon: categories
- identifier: tags
name: Tags
weight: 50
url: /en/tags
params:
icon: tag
social:
- identifier: "mail"
name: Mail
url: "mailto:etienne.girault@gmail.com"
weight: 10
params:
icon: "mail"
- identifier: "github"
name: "GitHub"
url: "https://github.com/Vezpi"
weight: 20
params:
icon: "brand-github"
- identifier: "linkedin"
name: "LinkedIn"
url: "https://www.linkedin.com/in/etiennegirault/"
weight: 40
params:
icon: "brand-linkedin"
params:
sidebar:
subtitle: "Homelab & Automation"
dateFormat:
published: "Jan 2, 2006"
lastUpdated: "Jan 2, 2006"
fr:
languageName: Français
weight: 2
menu:
main:
- identifier: categories
name: Catégories
weight: 40
url: /fr/categories
params:
icon: categories
- identifier: tags
name: Mots Clés
weight: 50
url: /fr/tags
params:
icon: tag
social:
- identifier: "mail"
name: Mail
url: "mailto:etienne.girault@gmail.com"
weight: 10
params:
icon: "mail"
- identifier: "github"
name: "GitHub"
url: "https://github.com/Vezpi"
weight: 20
params:
icon: "brand-github"
- identifier: "linkedin"
name: "LinkedIn"
url: "https://www.linkedin.com/in/etiennegirault/"
weight: 30
params:
icon: "brand-linkedin"
params:
sidebar:
subtitle: "Homelab & Automatisation"
dateFormat:
published: "2 Jan 2006"
lastUpdated: "2 Jan 2006"
frontmatter:
format: "yaml"
params:
mainSections: ["post"]
description: "A blog documenting my homelab journey, covering automation, self-hosted services, and hands-on experiments with open source technologies."
favicon: "/favicon-32x32.png"
sitemapFilter: ["/tags/", "/categories/", "/search/"]
sidebar:
subtitle: "Homelab & Automation"
avatar:
enabled: true
src: avatar.jpg
@@ -118,6 +21,9 @@ params:
since: 2025
customText: ""
dateFormat:
published: "Jan 2, 2006"
lastUpdated: "Jan 2, 2006"
toc:
enable: true
@@ -133,19 +39,54 @@ params:
widgets:
homepage:
- type: search
- type: categories
- type: tag-cloud
- type: "search"
- type: "categories"
- type: "tag-cloud"
page:
- type: search
- type: toc
- type: "toc"
colorScheme:
toggle: true
default: "dark"
menu:
main: []
main:
- name: Categories
url: /categories/
weight: 50
params:
icon: "categories"
- name: Tags
url: /tags/
weight: 60
params:
icon: "tag"
social:
- identifier: "mail"
name: Mail
url: "mailto:etienne.girault@gmail.com"
weight: 10
params:
icon: "mail"
- identifier: "github"
name: "GitHub"
url: "https://github.com/Vezpi"
weight: 20
params:
icon: "brand-github"
- identifier: "gitea"
name: "Gitea"
url: "https://git.vezpi.me/Vezpi/blog"
weight: 30
params:
icon: "brand-git"
- identifier: "linkedin"
name: "LinkedIn"
url: "https://www.linkedin.com/in/etiennegirault/"
weight: 40
params:
icon: "brand-linkedin"
taxonomies:
category: "categories"
@@ -161,10 +102,6 @@ related:
- name: categories
weight: 20
sitemap:
ChangeFreq: "weekly"
Priority: 0.6
outputs:
home:
- HTML

View File

@@ -1,78 +0,0 @@
menu_categories:
other: Catgories
menu_tags:
other: Tags
toggleMenu:
other: Toggle Menu
darkMode:
other: Theme
list:
page:
one: "{{ .Count }} page"
other: "{{ .Count }} pages"
section:
other: Section
subsection:
one: Subsection
other: Subsections
article:
back:
other: Back
tableOfContents:
other: Table of contents
relatedContent:
other: Related content
lastUpdatedOn:
other: Last updated on
readingTime:
one: "{{ .Count }} minute read"
other: "{{ .Count }} minute read"
notFound:
title:
other: Oops!
subtitle:
other: This page does not exist
widget:
archives:
title:
other: Archives
more:
other: More
tagCloud:
title:
other: Tags
categoriesCloud:
title:
other: Categories
search:
title:
other: Search
placeholder:
other: Type something...
resultTitle:
other: "#PAGES_COUNT pages (#TIME_SECONDS seconds)"
footer:
builtWith:
other: " "
designedBy:
other: " "

View File

@@ -1,77 +0,0 @@
menu_categories:
other: Catégories
menu_tags:
other: Mots Clés
toggleMenu:
other: Afficher le menu
darkMode:
other: Thème
list:
page:
one: "{{ .Count }} page"
other: "{{ .Count }} pages"
section:
other: Section
subsection:
one: Sous-section
other: Sous-sections
article:
back:
other: Retour
tableOfContents:
other: Table des matières
relatedContent:
other: Contenus liés
lastUpdatedOn:
other: Dernière mise à jour le
readingTime:
one: "{{ .Count }} minute de lecture"
other: "{{ .Count }} minutes de lecture"
notFound:
title:
other: Oups !
subtitle:
other: Cette page n'existe pas.
widget:
archives:
title:
other: Archives
more:
other: Autres
tagCloud:
title:
other: Mots clés
categoriesCloud:
title:
other: Catégories
search:
title:
other: Rechercher
placeholder:
other: Tapez quelque chose...
resultTitle:
other: "#PAGES_COUNT pages (#TIME_SECONDS secondes)"
footer:
builtWith:
other: " "
designedBy:
other: " "

View File

@@ -1,20 +0,0 @@
{{ printf "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"yes\"?>" | safeHTML }}
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
{{ range $page := .Data.Pages }}
{{- $includeURL := true -}}
{{- range $val := $.Site.Params.sitemapFilter -}}
{{- if (in $page.Permalink $val) -}}
{{- $includeURL = false -}}
{{- end -}}
{{- end -}}
{{- if and $page.Permalink $includeURL -}}
<url>
<loc>{{ $page.Permalink }}</loc>{{ if not $page.Lastmod.IsZero }}
<lastmod>{{ $page.Lastmod.Format "2006-01-02T15:04:05-07:00" | safeHTML }}</lastmod>{{ end }}{{ with $page.Sitemap.ChangeFreq }}
<changefreq>{{ . }}</changefreq>{{ end }}{{ if ge $page.Sitemap.Priority 0.0 }}
<priority>{{ $page.Sitemap.Priority }}</priority>{{ end }}{{ if $page.IsTranslated }}{{ range $page.Translations }}
<xhtml:link rel="alternate" hreflang="{{ .Language.Lang }}" href="{{ .Permalink }}"/>{{ end }}{{ end }}
</url>
{{- end -}}
{{ end }}
</urlset>

View File

@@ -1,30 +0,0 @@
{{ define "main" }}
<header class="homepage-header">
{{ with .Title }}
<h1 class="article-title">{{ . }}</h1>
{{ end }}
{{ with .Content }}
<div class="article-subtitle">
{{ . }}
</div>
{{ end }}
</header>
{{ $pages := where .Site.RegularPages "Type" "in" .Site.Params.mainSections }}
{{ $notHidden := where .Site.RegularPages "Params.hidden" "!=" true }}
{{ $filtered := ($pages | intersect $notHidden) }}
{{ $pag := .Paginate ($filtered) }}
<section class="article-list">
{{ range $index, $element := $pag.Pages }}
{{ partial "article-list/default" . }}
{{ end }}
</section>
{{- partial "pagination.html" . -}}
{{- partial "footer/footer" . -}}
{{ end }}
{{ define "right-sidebar" }}
{{ partial "sidebar/right.html" (dict "Context" . "Scope" "homepage") }}
{{ end }}

View File

@@ -1,11 +0,0 @@
{{- $.Scratch.Add "index" slice -}}
{{- range .Site.RegularPages -}}
{{- $.Scratch.Add "index" (dict
"title" .Title
"permalink" .Permalink
"summary" .Summary
"tags" .Params.tags
"date" (.Date.Format "2006-01-02")
) -}}
{{- end -}}
{{- $.Scratch.Get "index" | jsonify -}}

View File

@@ -1,60 +0,0 @@
<div class="article-details">
{{ if .Params.categories }}
<header class="article-category">
{{ range (.GetTerms "tags") }}
<a href="{{ .RelPermalink }}" {{ with .Params.style }}style="background-color: {{ .background }}; color: {{ .color }};"{{ end }}>
{{ .LinkTitle }}
</a>
{{ end }}
</header>
{{ end }}
<div class="article-title-wrapper">
<h2 class="article-title">
<a href="{{ .RelPermalink }}">
{{- .Title -}}
</a>
</h2>
{{ with .Params.description }}
<h3 class="article-subtitle">
{{ . }}
</h3>
{{ end }}
</div>
{{ $showReadingTime := .Params.readingTime | default (.Site.Params.article.readingTime) }}
{{ $showDate := not .Date.IsZero }}
{{ $showFooter := or $showDate $showReadingTime }}
{{ if $showFooter }}
<footer class="article-time">
{{ if $showDate }}
<div>
{{ partial "helper/icon" "date" }}
<time class="article-time--published">
{{- .Date | time.Format (or .Site.Params.dateFormat.published "Jan 02, 2006") -}}
</time>
</div>
{{ end }}
{{ if $showReadingTime }}
<div>
{{ partial "helper/icon" "stopwatch" }}
<time class="article-time--reading">
{{ T "article.readingTime" .ReadingTime }}
</time>
</div>
{{ end }}
{{- $date := .Date.Format "20060102" | int -}}
{{- $lastmod := .Lastmod.Format "20060102" | int -}}
{{- if gt $lastmod $date -}}
<div class="article-lastmod">
{{ partial "helper/icon" "refresh" }}
<time>
{{ T "article.lastUpdatedOn" }} {{ .Lastmod | time.Format ( or .Site.Params.dateFormat.lastUpdated "Jan 02, 2006 15:04 MST" ) }}
</time>
</div>
{{- end -}}
</footer>
{{ end }}
</div>

View File

@@ -1,10 +0,0 @@
<script defer src="https://analytics.vezpi.com/script.js" data-website-id="e50e5843-1039-4bc8-a3f6-80f60e25ea38"></script>
{{ if or (eq .Kind "taxonomy") (eq .Kind "term") }}
<meta name="robots" content="noindex,follow">
{{ end }}
{{- if .Params.keywords }}
<meta name="keywords" content="{{ delimit .Params.keywords ", " }}">
{{- else if .Params.tags }}
<meta name="keywords" content="{{ delimit .Params.tags ", " }}">
{{- end }}

View File

@@ -1,99 +0,0 @@
<aside class="sidebar left-sidebar sticky {{ if .Site.Params.sidebar.compact }}compact{{ end }}">
<button class="hamburger hamburger--spin" type="button" id="toggle-menu" aria-label="{{ T `toggleMenu` }}">
<span class="hamburger-box">
<span class="hamburger-inner"></span>
</span>
</button>
<header>
{{ with .Site.Params.sidebar.avatar }}
{{ if (default true .enabled) }}
<figure class="site-avatar" style="margin-left: auto; margin-right: auto;">
<a href="{{ .Site.BaseURL | relLangURL }}">
{{ if not .local }}
<img src="{{ .src }}" width="300" height="300" class="site-logo" loading="lazy" alt="Avatar">
{{ else }}
{{ $avatar := resources.Get (.src) }}
{{ if $avatar }}
{{ $avatarResized := $avatar.Resize "300x" }}
<img src="{{ $avatarResized.RelPermalink }}" width="{{ $avatarResized.Width }}"
height="{{ $avatarResized.Height }}" class="site-logo" loading="lazy" alt="Avatar">
{{ else }}
{{ errorf "Failed loading avatar from %q" . }}
{{ end }}
{{ end }}
</a>
{{ with $.Site.Params.sidebar.emoji }}
<span class="emoji">{{ . }}</span>
{{ end }}
</figure>
{{ end }}
{{ end }}
<div class="site-meta" style="text-align: center;">
<h1 class="site-name"><a href="{{ .Site.BaseURL | relLangURL }}">{{ .Site.Title }}</a></h1>
<h2 class="site-description" style="font-size: 1.3rem;">{{ .Site.Params.sidebar.subtitle }}</h2>
</div>
</header>
{{- $page := . -}}
{{- with .Site.Menus.social -}}
<ol class="menu-social">
{{ range . }}
<li>
<a
href='{{ .URL }}'
{{ if eq (default true .Params.newTab) true }}target="_blank"{{ end }}
{{ with .Name }}title="{{ . }}"{{ end }}
rel="me"
>
{{ $icon := default "link" .Params.Icon }}
{{ with $icon }}
{{ partial "helper/icon" . }}
{{ end }}
</a>
</li>
{{ end }}
{{- $currentLang := $page.Language.Lang -}}
{{- range $page.AllTranslations }}
{{- if ne .Language.Lang $currentLang }}
<li class="lang-toggle-icon">
<a href="{{ .Permalink }}" title="Switch to {{ .Language.Lang }}">
{{ partial "helper/icon" (printf "toggle_to_%s" .Language.Lang) }}
</a>
</li>
{{- end }}
{{- end }}
</ol>
{{- end -}}
<ol class="menu" id="main-menu">
{{ $currentPage := . }}
{{ range .Site.Menus.main }}
{{ $active := or (eq $currentPage.Title .Name) (or ($currentPage.HasMenuCurrent "main" .) ($currentPage.IsMenuCurrent "main" .)) }}
<li {{ if $active }} class='current' {{ end }}>
<a href='{{ .URL }}' {{ if eq .Params.newTab true }}target="_blank"{{ end }}>
{{ $icon := default .Pre .Params.Icon }}
{{ if .Pre }}
{{ warnf "Menu item [%s] is using [pre] field to set icon, please use [params.icon] instead.\nMore information: https://stack.jimmycai.com/config/menu" .URL }}
{{ end }}
{{ with $icon }}
{{ partial "helper/icon" . }}
{{ end }}
<span>{{- .Name -}}</span>
</a>
</li>
{{ end }}
<li class="menu-bottom-section">
<ol class="menu">
{{ if (default false .Site.Params.colorScheme.toggle) }}
<li id="dark-mode-toggle">
{{ partial "helper/icon" "moon" }}
<span>{{ T "darkMode" }}</span>
</li>
{{ end }}
</ol>
</li>
</ol>
</aside>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
{"Target":"css/coder.css","MediaType":"text/css","Data":{}}

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1 @@
{"Target":"/scss/style.min.663803bebe609202d5b39d848f2d7c2dc8b598a2d879efa079fa88893d29c49c.css","MediaType":"text/css","Data":{"Integrity":"sha256-ZjgDvr5gkgLVs52Ejy18Lci1mKLYee+gefqIiT0pxJw="}}

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 410 B

View File

@@ -1 +0,0 @@
google-site-verification: google9b80e9ae298ce549.html

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 554 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 MiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 64 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 82 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 386 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 295 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 568 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 135 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Some files were not shown because too many files have changed in this diff Show More