Compare commits
212 Commits
7a5de897b0
...
preview
Author | SHA1 | Date | |
---|---|---|---|
|
372cb53c54 | ||
|
fc88903a3b | ||
|
f9624cf14a | ||
|
f2c691aa0a | ||
|
9cdfe58197 | ||
|
fd2827e0a8 | ||
|
ea325bed60 | ||
|
ac4d3b9a45 | ||
|
8c340d6e8a | ||
|
0d9befcb8d | ||
|
aeb2d33162 | ||
|
62f267dad4 | ||
|
452f4497b2 | ||
|
89ebf0ddcc | ||
|
ba4d20b861 | ||
|
95512dc407 | ||
|
d635eeffc4 | ||
|
e0d7ec4bde | ||
|
6c2856e935 | ||
|
2b3cc8d687 | ||
|
11137262a1 | ||
|
41c3a7d7a9 | ||
|
e1cd58ed50 | ||
|
3144fd29f7 | ||
|
29b9af9085 | ||
|
90bb1ba3b3 | ||
|
0f70fab68a | ||
|
61697ad237 | ||
|
097d088847 | ||
|
f99d986f3a | ||
|
548d9a37b5 | ||
|
fe8656f9df | ||
|
9578f503be | ||
|
ed94d0adc5 | ||
|
593954a59f | ||
|
8928bfde23 | ||
8078a6afb3 | |||
e440b075ea | |||
d027da53a2 | |||
5548718ba6 | |||
720b4b6ce1 | |||
|
759b06b28e | ||
c4bd44e1f2 | |||
a3d24345aa | |||
34bc536396 | |||
ada377ef8e | |||
|
a71cba5e61 | ||
|
2c5f8e14dd | ||
|
5456a2f998 | ||
|
2376b15d9d | ||
|
6d95ce333b | ||
|
098d38e1d6 | ||
|
ea236c3865 | ||
|
921ceb6ce0 | ||
|
6e6d16e474 | ||
|
1a8ca54ada | ||
|
650808a9c0 | ||
|
a160e45ff3 | ||
|
03bbd72ede | ||
|
ab14c4bcf7 | ||
|
7e817a021e | ||
|
febb26e1e9 | ||
|
12db348a61 | ||
|
2ec49c1496 | ||
|
b98c7dc534 | ||
|
4d06825dcf | ||
|
bb422a47cb | ||
|
988319fc34 | ||
|
88f56d5b98 | ||
|
c3d58d1b4e | ||
|
228c79a658 | ||
|
f24a14164f | ||
|
12a0a997f1 | ||
|
f861acac7f | ||
|
f6b4affd1e | ||
|
a14ad90979 | ||
|
ec473f542d | ||
|
3e0a0cfd47 | ||
|
f272a529ee | ||
|
de21e876d6 | ||
|
d91f1996a0 | ||
|
7e33d65bf6 | ||
|
372f7aa8ae | ||
|
dc0c4ecfa6 | ||
|
a1725ecfbb | ||
|
43c30b2903 | ||
|
207bc826db | ||
|
b83d21103d | ||
|
2ba7e9d986 | ||
|
a6e0968ea6 | ||
|
c4ed7d6ea8 | ||
|
cdd006cc09 | ||
|
5504c6bebf | ||
|
7ae7170478 | ||
|
4609fe6301 | ||
|
d23643bd32 | ||
|
bfb1dbf1d3 | ||
|
a7fc4cae7b | ||
|
a610bca89a | ||
|
93dfed6c5e | ||
|
bebd72319b | ||
|
3d34e7f5ee | ||
0be3fe6ec1 | |||
4fb227de56 | |||
23fb1992d6 | |||
e7e889092d | |||
3f7a2e47a0 | |||
|
3734c1ccf8 | ||
c72b05f83e | |||
1bf469c20a | |||
58a16633f8 | |||
5973472126 | |||
2b651f14d3 | |||
f4715e5262 | |||
5e137ec974 | |||
10d5bf3b76 | |||
93b6d5c4b0 | |||
c6c06bed87 | |||
0ac571acf4 | |||
732bf86858 | |||
5d9afb9206 | |||
53e4ce701e | |||
0e925fffc0 | |||
73418f151e | |||
6cfccc8f16 | |||
68ffa3b1f5 | |||
683b65f420 | |||
e0f3750751 | |||
52fc169051 | |||
fcb186bc2b | |||
e8c50d8fd2 | |||
663ebb8e3b | |||
8e7536111a | |||
2d003c0d5c | |||
e0b35532fd | |||
630e182e72 | |||
e39247c28a | |||
4a7d691baa | |||
e7e98998aa | |||
ef7792bafe | |||
1ce187a4c8 | |||
e10b8da995 | |||
3add412ffe | |||
04e6e82401 | |||
091273aab6 | |||
9b2183d37d | |||
7d7f0645f0 | |||
55c527f9a2 | |||
17075b53f7 | |||
|
db586c2926 | ||
7f9323246c | |||
|
5bd1cc092b | ||
a83c16e3e9 | |||
|
86607c2563 | ||
19d5119cb6 | |||
616efe845d | |||
299c13cb9f | |||
ecb5bccd77 | |||
4ed5972a11 | |||
|
3482f94916 | ||
c2cd9c3c7d | |||
|
9375667d3b | ||
0edfb71926 | |||
|
1f448ec52d | ||
9e3a50f649 | |||
1ff68dbb30 | |||
109eff71c8 | |||
9a34b0ea70 | |||
48fcc8f7b3 | |||
4d8ad5779b | |||
90ce31b2ec | |||
73a583bb22 | |||
|
8b6938db17 | ||
|
a8a282b4d9 | ||
|
48a6c3fea7 | ||
|
b3baeeb1f6 | ||
ea41ac753b | |||
e391a05867 | |||
74c37501b5 | |||
5cc0f25a12 | |||
ddf552562a | |||
c7d2005178 | |||
3e78c8e6e0 | |||
9bb8f336d7 | |||
03ee63f945 | |||
4355b61dd2 | |||
fa6736b9f2 | |||
122180025d | |||
9dc8b0646a | |||
fd93c00262 | |||
fcd52e524b | |||
|
c55e2649bb | ||
75f7136ba3 | |||
c23c6c1bd0 | |||
1d2e69a1ff | |||
cf2d7d6ec9 | |||
f8ff02b404 | |||
49005ef7ae | |||
a69e13af08 | |||
90284665a6 | |||
c1483e0cca | |||
81d6d6f64e | |||
734b426559 | |||
20a9dfa633 | |||
bdd5002700 | |||
d7591005a3 | |||
8abef26a30 | |||
a790afa511 | |||
4a878c08e1 | |||
195978b948 | |||
86fd5e7567 | |||
6952888acc |
@@ -4,6 +4,8 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- preview
|
- preview
|
||||||
|
schedule:
|
||||||
|
- cron: '0 3 * * 5'
|
||||||
|
|
||||||
env:
|
env:
|
||||||
DOCKER_IMAGE: vezpi-blog
|
DOCKER_IMAGE: vezpi-blog
|
||||||
@@ -20,9 +22,10 @@ jobs:
|
|||||||
newer_version_available: ${{ steps.compare.outputs.version }}
|
newer_version_available: ${{ steps.compare.outputs.version }}
|
||||||
current_docker_image: ${{ steps.current_docker.outputs.image }}
|
current_docker_image: ${{ steps.current_docker.outputs.image }}
|
||||||
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
|
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
|
||||||
|
dev_lock_present: ${{ steps.check_dev_lock.outputs.locked }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
|
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
|
||||||
|
|
||||||
- name: Check Latest Hugo Version
|
- name: Check Latest Hugo Version
|
||||||
id: get_latest
|
id: get_latest
|
||||||
@@ -68,6 +71,15 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
echo "changed=$docker_folder_changed" | tee -a $GITEA_OUTPUT
|
echo "changed=$docker_folder_changed" | tee -a $GITEA_OUTPUT
|
||||||
|
|
||||||
|
- name: Check for .dev-lock file
|
||||||
|
id: check_dev_lock
|
||||||
|
run: |
|
||||||
|
if [ -f .dev-lock ]; then
|
||||||
|
echo "locked=true" | tee -a $GITEA_OUTPUT
|
||||||
|
else
|
||||||
|
echo "locked=false" | tee -a $GITEA_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
Build:
|
Build:
|
||||||
needs: Check-Rebuild
|
needs: Check-Rebuild
|
||||||
if: needs.Check-Rebuild.outputs.newer_version_available == 'true' || needs.Check-Rebuild.outputs.docker_folder_changed == 'true'
|
if: needs.Check-Rebuild.outputs.newer_version_available == 'true' || needs.Check-Rebuild.outputs.docker_folder_changed == 'true'
|
||||||
@@ -77,7 +89,7 @@ jobs:
|
|||||||
shell: sh
|
shell: sh
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
|
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
|
||||||
|
|
||||||
- name: Build Docker Image
|
- name: Build Docker Image
|
||||||
run: |
|
run: |
|
||||||
@@ -89,9 +101,7 @@ jobs:
|
|||||||
docker tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} ${DOCKER_IMAGE}:latest
|
docker tag ${DOCKER_IMAGE}:${{ needs.Check-Rebuild.outputs.latest_hugo_version }} ${DOCKER_IMAGE}:latest
|
||||||
|
|
||||||
Deploy-Staging:
|
Deploy-Staging:
|
||||||
needs:
|
needs: [Check-Rebuild, Build]
|
||||||
- Check-Rebuild
|
|
||||||
- Build
|
|
||||||
if: always() && needs.Check-Rebuild.result == 'success' && (needs.Build.result == 'skipped' || needs.Build.result == 'success')
|
if: always() && needs.Check-Rebuild.result == 'success' && (needs.Build.result == 'skipped' || needs.Build.result == 'success')
|
||||||
runs-on: docker
|
runs-on: docker
|
||||||
container:
|
container:
|
||||||
@@ -131,7 +141,8 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
Merge:
|
Merge:
|
||||||
needs: Test-Staging
|
needs: [Check-Rebuild, Test-Staging]
|
||||||
|
if: needs.Test-Staging.result == 'success' && needs.Check-Rebuild.outputs.dev_lock_present == 'false'
|
||||||
runs-on: ubuntu
|
runs-on: ubuntu
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
@@ -185,10 +196,7 @@ jobs:
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
Clean:
|
Clean:
|
||||||
needs:
|
needs: [Check-Rebuild, Build, Test-Production]
|
||||||
- Check-Rebuild
|
|
||||||
- Build
|
|
||||||
- Test-Production
|
|
||||||
runs-on: docker
|
runs-on: docker
|
||||||
defaults:
|
defaults:
|
||||||
run:
|
run:
|
||||||
@@ -198,3 +206,40 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
docker image rm ${{ needs.Check-Rebuild.outputs.current_docker_image }} --force
|
docker image rm ${{ needs.Check-Rebuild.outputs.current_docker_image }} --force
|
||||||
|
|
||||||
|
Notify:
|
||||||
|
needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
|
||||||
|
runs-on: ubuntu
|
||||||
|
if: always() && needs.Check-Rebuild.outputs.dev_lock_present == 'false'
|
||||||
|
env:
|
||||||
|
NTFY_URL: https://ntfy.vezpi.com
|
||||||
|
NTFY_TOPIC: blog
|
||||||
|
NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
|
||||||
|
steps:
|
||||||
|
- name: Notify Workflow Result
|
||||||
|
run: |
|
||||||
|
if [[
|
||||||
|
"${{ needs.Check-Rebuild.result }}" == "success" &&
|
||||||
|
("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
|
||||||
|
"${{ needs.Deploy-Staging.result }}" == "success" &&
|
||||||
|
"${{ needs.Test-Staging.result }}" == "success" &&
|
||||||
|
"${{ needs.Merge.result }}" == "success" &&
|
||||||
|
"${{ needs.Deploy-Production.result }}" == "success" &&
|
||||||
|
"${{ needs.Test-Production.result }}" == "success" &&
|
||||||
|
("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
|
||||||
|
]]; then
|
||||||
|
curl -H "Priority: min" \
|
||||||
|
-H "Tags: white_check_mark" \
|
||||||
|
-d "Blog workflow completed successfully." \
|
||||||
|
-u ${NTFY_TOKEN} \
|
||||||
|
${NTFY_URL}/${NTFY_TOPIC}
|
||||||
|
else
|
||||||
|
curl -H "Priority: high" \
|
||||||
|
-H "Tags: x" \
|
||||||
|
-H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
|
||||||
|
view, Verify Blog, https://blog.vezpi.com, clear=true" \
|
||||||
|
-d "Blog workflow failed!" \
|
||||||
|
-u ${NTFY_TOKEN} \
|
||||||
|
${NTFY_URL}/${NTFY_TOPIC}
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
@@ -1 +0,0 @@
|
|||||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-brand-git"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M16 12m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 8m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 16m-1 0a1 1 0 1 0 2 0a1 1 0 1 0 -2 0" /><path d="M12 15v-6" /><path d="M15 11l-2 -2" /><path d="M11 7l-1.9 -1.9" /><path d="M13.446 2.6l7.955 7.954a2.045 2.045 0 0 1 0 2.892l-7.955 7.955a2.045 2.045 0 0 1 -2.892 0l-7.955 -7.955a2.045 2.045 0 0 1 0 -2.892l7.955 -7.955a2.045 2.045 0 0 1 2.892 0z" /></svg>
|
|
Before Width: | Height: | Size: 732 B |
@@ -1 +0,0 @@
|
|||||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="currentColor" class="icon icon-tabler icons-tabler-filled icon-tabler-brightness-up"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M12 8a4 4 0 1 1 -3.995 4.2l-.005 -.2l.005 -.2a4 4 0 0 1 3.995 -3.8z" /><path d="M12 2a1 1 0 0 1 .993 .883l.007 .117v2a1 1 0 0 1 -1.993 .117l-.007 -.117v-2a1 1 0 0 1 1 -1z" /><path d="M17.693 4.893a1 1 0 0 1 1.497 1.32l-.083 .094l-1.4 1.4a1 1 0 0 1 -1.497 -1.32l.083 -.094l1.4 -1.4z" /><path d="M21 11a1 1 0 0 1 .117 1.993l-.117 .007h-2a1 1 0 0 1 -.117 -1.993l.117 -.007h2z" /><path d="M16.293 16.293a1 1 0 0 1 1.32 -.083l.094 .083l1.4 1.4a1 1 0 0 1 -1.32 1.497l-.094 -.083l-1.4 -1.4a1 1 0 0 1 0 -1.414z" /><path d="M12 18a1 1 0 0 1 .993 .883l.007 .117v2a1 1 0 0 1 -1.993 .117l-.007 -.117v-2a1 1 0 0 1 1 -1z" /><path d="M6.293 16.293a1 1 0 0 1 1.497 1.32l-.083 .094l-1.4 1.4a1 1 0 0 1 -1.497 -1.32l.083 -.094l1.4 -1.4z" /><path d="M6 11a1 1 0 0 1 .117 1.993l-.117 .007h-2a1 1 0 0 1 -.117 -1.993l.117 -.007h2z" /><path d="M4.893 4.893a1 1 0 0 1 1.32 -.083l.094 .083l1.4 1.4a1 1 0 0 1 -1.32 1.497l-.094 -.083l-1.4 -1.4a1 1 0 0 1 0 -1.414z" /></svg>
|
|
Before Width: | Height: | Size: 1.2 KiB |
1
assets/icons/message-language.svg
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="icon icon-tabler icons-tabler-outline icon-tabler-message-language"><path stroke="none" d="M0 0h24v24H0z" fill="none"/><path d="M4 21v-13a3 3 0 0 1 3 -3h10a3 3 0 0 1 3 3v6a3 3 0 0 1 -3 3h-9l-4 4" /><path d="M10 14v-4a2 2 0 1 1 4 0v4" /><path d="M14 12h-4" /></svg>
|
After Width: | Height: | Size: 462 B |
1
assets/icons/toggle_to_en.svg
Normal file
After Width: | Height: | Size: 90 KiB |
1
assets/icons/toggle_to_fr.svg
Normal file
After Width: | Height: | Size: 90 KiB |
11
assets/scss/custom.scss
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
.homepage-header {
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.lang-toggle-icon {
|
||||||
|
margin-left: auto;
|
||||||
|
svg {
|
||||||
|
width: 64px;
|
||||||
|
height: 24px;
|
||||||
|
}
|
||||||
|
}
|
6
content/_index.fr.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
title: Bienvenue sur Vezpi Lab
|
||||||
|
description: Ici les derniers articles
|
||||||
|
---
|
||||||
|
Ce blog partage mes projets et expériences dans mon homelab.
|
||||||
|
Vous trouverez ci-dessous les derniers articles.
|
6
content/_index.md
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
---
|
||||||
|
title: Welcome to Vezpi Lab
|
||||||
|
description: Here the latest articles
|
||||||
|
---
|
||||||
|
This blog shares project and experiments from my homelab.
|
||||||
|
Below you'll find the latest articles.
|
@@ -9,12 +9,12 @@ menu:
|
|||||||
params:
|
params:
|
||||||
icon: user
|
icon: user
|
||||||
---
|
---
|
||||||
Salut ! Moi c'est Etienne, j'adore l'**automatisation** et je suis un amateur de projets **homelab**. Je suis un expert Linux et travaille comme **Senior Cloud DevOps Engineer** chez Capgemini.
|
Salut ! Moi c'est Etienne, j'adore l'**automatisation** et je suis un amateur de projets **homelab**. Je suis un expert Linux et je travaille comme **Senior Cloud DevOps Engineer** chez Capgemini.
|
||||||
|
|
||||||
Motivé par la passion, j'aime explorer de nouvelles technologie, comprendre comment elles fonctionnement et les expérimenter chez moi, juste pour le plaisir. Mon lab est passé d'un simple espace de bidouille à un véritable terrain de jeu pour expérimenter la virtualisation, l'orchestration de conteneurs, le réseau, et bien plus encore.
|
Motivé par la passion, j'aime explorer de nouvelles technologies, comprendre comment elles fonctionnent et les expérimenter chez moi, juste pour le plaisir. Mon lab est passé d'un simple espace de bidouille à un véritable terrain de jeu pour expérimenter la virtualisation, l'orchestration de conteneurs, le réseau, et bien plus encore.
|
||||||
|
|
||||||
Ce blog est ma façon de documenter ce que je construis, casse (volontairement !), répare et surtout, ce que j'**apprends**. C'est une référence personnelle, mais aussi un moyen de partager avec la communauté, de m’open source, au cas où quelqu'un d'autre suivrait le même chemin et trouverait mon expérience utile.
|
Ce blog est ma façon de documenter ce que je construis, casse (volontairement !), répare et surtout, ce que j'**apprends**. C'est une référence personnelle, mais aussi un moyen de partager avec la communauté, de m’open source, au cas où quelqu'un d'autre suivrait le même chemin et trouverait mon expérience utile.
|
||||||
|
|
||||||
Même si je suis français, J'écris mes notes personnelles en anglais pour perfectionner l'utilisation de cette langue, mais j’essaye tout de même de les traduire dans ma langue maternelle.
|
Même si je suis français, j'écris mes notes personnelles en anglais pour perfectionner l'utilisation de cette langue, mais j’essaie tout de même de les traduire dans ma langue maternelle.
|
||||||
|
|
||||||
Si vous travaillez sur un projet intéressant, si vous avez des questions ou si vous souhaitez proposer de nouvelles idées, **n'hésitez pas à me contacter** !
|
Si vous travaillez sur un projet intéressant, si vous avez des questions ou si vous souhaitez proposer de nouvelles idées, **n'hésitez pas à me contacter** !
|
||||||
|
224
content/post/10-opnsense-crash-disk-panic.fr.md
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
---
|
||||||
|
slug: opnsense-crash-disk-panic
|
||||||
|
title: Mon Routeur OPNsense Crash, de la Panique à la Renaissance
|
||||||
|
description: L'histoire sur comment j’ai survécu à un crash OPNsense à cause d'une disque défaillant et pourquoi un fichier XML a tout changé.
|
||||||
|
date: 2025-08-24
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- opnsense
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Cette semaine, j’ai vécu mon premier vrai problème dans mon homelab, qui a fait tomber tout mon réseau à la maison.
|
||||||
|
|
||||||
|
Mon routeur OPNsense a crash et, après plusieurs tentatives de récupération ratées, j’ai finalement dû le réinstaller from scratch. Heureusement, presque toute la configuration est revenue grâce à un simple fichier XML. Dans cette histoire, je vais raconter ce qui s’est passé, ce que j’ai fait pour m’en sortir, et aussi ce que je n’aurais pas dû faire.
|
||||||
|
|
||||||
|
Ce genre d’exercice est la pire chose que vous souhaitez voir arriver, parce que ce n’est jamais amusant de voir tout exploser. Mais c’est de loin la meilleure façon d’apprendre.
|
||||||
|
|
||||||
|
## Le Calme Avant la Tempête
|
||||||
|
|
||||||
|
Ma box OPNsense tournait parfaitement depuis des mois. Routeur, pare-feu, DNS, DHCP, VLANs, VPN, reverse proxy et même contrôleur UniFi : toutes les pièces de mon homelab passe par elle. Mais pas seulement, elle fournit aussi Internet à la maison.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Cette box est le cœur de mon réseau, sans elle, je ne peux quasiment rien faire. J’ai détaillé son fonctionnement dans ma section [Homelab]({{< ref "page/homelab" >}}). Tout “fonctionnait juste”, et je ne m’en inquiétait pas. J’étais confiant, sa sauvegarde vivait uniquement à l’intérieur de la machine…
|
||||||
|
|
||||||
|
Peut-être trop confiant.
|
||||||
|
|
||||||
|
## Le Redémarrage Inattendu
|
||||||
|
|
||||||
|
Sans prévenir, la box a redémarré toute seule, juste avant minuit. Par chance, je passais à côté de mon rack en allant me coucher. J’ai su qu’elle avait redémarré car j’ai entendu son petit bip de démarrage.
|
||||||
|
|
||||||
|
Je me suis demandé pourquoi le routeur avait redémarré sans mon accord. Dans mon lit, j’ai rapidement vérifié si Internet fonctionnait : oui. Mais aucun de mes services n’était disponible, ni la domotique, ni ce blog. J’étais fatigué, je réglerais ça le lendemain…
|
||||||
|
|
||||||
|
Au matin, en regardant les logs, j’ai trouvé le coupable :
|
||||||
|
```
|
||||||
|
panic: double fault
|
||||||
|
```
|
||||||
|
|
||||||
|
Un kernel panic. Mon routeur avait littéralement planté au niveau matériel.
|
||||||
|
|
||||||
|
## Premières Tentatives de Dépannage
|
||||||
|
|
||||||
|
Au début, l’impact semblait mineur. Un seul service ne redémarrait pas : Caddy, mon reverse proxy. Ce qui expliquait pourquoi mes services n’étaient pas accessibles.
|
||||||
|
|
||||||
|
En fouillant dans les logs, j’ai trouvé l’erreur :
|
||||||
|
```
|
||||||
|
caching certificate: decoding certificate metadata: unexpected end of JSON input
|
||||||
|
```
|
||||||
|
|
||||||
|
Un des certificats mis en cache avait été corrompu pendant le crash. En supprimant son dossier de cache, Caddy est reparti et, d’un coup, tous mes services HTTPS étaient de retour.
|
||||||
|
|
||||||
|
Je pensais avoir esquivé la balle. Je n’ai pas cherché plus loin sur la cause réelle : les logs du kernel étaient pollués par une interface qui “flappait”, j’ai cru à un simple bug. À la place, je me suis lancé dans une mise à jour, ma première erreur.
|
||||||
|
|
||||||
|
Mon instance OPNsense était en version 25.1, et la 25.7 venait de sortir. Allons-y gaiement !
|
||||||
|
|
||||||
|
La mise à jour s’est déroulée correctement, mais quelque chose clochait. En cherchant de nouvelles updates, j’ai vu une corruption dans `pkg`, la base de données du gestionnaire de paquets :
|
||||||
|
```
|
||||||
|
pkg: sqlite error while executing iterator in file pkgdb_iterator.c:1110: database disk image is malformed
|
||||||
|
```
|
||||||
|
|
||||||
|
🚨 Mon alarme interne s'est déclenchée. J’ai pensé aux sauvegardes et j’ai immédiatement téléchargé la dernière :
|
||||||
|

|
||||||
|
|
||||||
|
En cliquant sur le bouton `Download configuration`, j’ai récupéré le `config.xml` en cours d’utilisation. Je pensais que ça suffirait.
|
||||||
|
|
||||||
|
## Corruption du Système de Fichiers
|
||||||
|
|
||||||
|
J’ai tenté de réparer la base `pkg` de la pire façon possible : j’ai sauvegardé le dossier `/var/db/pkg` puis essayé de refaire un `bootstrap` :
|
||||||
|
```bash
|
||||||
|
cp -a /var/db/pkg /var/db/pkg.bak
|
||||||
|
pkg bootstrap -f
|
||||||
|
```
|
||||||
|
```
|
||||||
|
The package management tool is not yet installed on your system.
|
||||||
|
Do you want to fetch and install it now? [y/N]: y
|
||||||
|
Bootstrapping pkg from https://pkg.opnsense.org/FreeBSD:14:amd64/25.7/latest, please wait...
|
||||||
|
[...]
|
||||||
|
pkg-static: Fail to extract /usr/local/lib/libpkg.a from package: Write error
|
||||||
|
Failed to install the following 1 package(s): /tmp//pkg.pkg.scQnQs
|
||||||
|
[...]
|
||||||
|
A pre-built version of pkg could not be found for your system.
|
||||||
|
```
|
||||||
|
|
||||||
|
J’ai vu un `Write error`. Je soupçonnais un problème disque. J’ai lancé `fsck` et reçu un flot d’incohérences :
|
||||||
|
```bash
|
||||||
|
fsck -n
|
||||||
|
```
|
||||||
|
```
|
||||||
|
[...]
|
||||||
|
INCORRECT BLOCK COUNT I=13221121 (208384 should be 208192)
|
||||||
|
INCORRECT BLOCK COUNT I=20112491 (8 should be 0)
|
||||||
|
INCORRECT BLOCK COUNT I=20352874 (570432 should be 569856)
|
||||||
|
[...]
|
||||||
|
FREE BLK COUNT(S) WRONG IN SUPERBLK
|
||||||
|
[...]
|
||||||
|
SUMMARY INFORMATION BAD
|
||||||
|
[...]
|
||||||
|
BLK(S) MISSING IN BIT MAPS
|
||||||
|
[...]
|
||||||
|
***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****
|
||||||
|
```
|
||||||
|
|
||||||
|
Le système de fichiers root était en mauvais état.
|
||||||
|
|
||||||
|
N’ayant que SSH et pas de console, j’ai forcé un `fsck` au prochain redémarrage :
|
||||||
|
```bash
|
||||||
|
sysrc fsck_y_enable="YES"
|
||||||
|
sysrc background_fsck="NO"
|
||||||
|
reboot
|
||||||
|
```
|
||||||
|
|
||||||
|
Au redémarrage, le système a été réparé suffisamment pour relancer `pkg bootstrap`. Mais la moitié des paquets système avaient disparu. Ma mise à jour précédente sur un disque corrompu m’avait laissé avec un système bancal, à moitié installé, à moitié manquant.
|
||||||
|
|
||||||
|
## Quand ça empire
|
||||||
|
|
||||||
|
J’ai découvert l’utilitaire `opnsense-bootstrap`, censé remettre le système à plat :
|
||||||
|
- Suppression de tous les paquets installés
|
||||||
|
- Téléchargement et installation d’un nouveau noyau/base 25.7
|
||||||
|
- Réinstallation des paquets standards
|
||||||
|
|
||||||
|
Parfait !
|
||||||
|
```
|
||||||
|
opnsense-bootstrap
|
||||||
|
```
|
||||||
|
```
|
||||||
|
This utility will attempt to turn this installation into the latest OPNsense 25.7 release. All packages will be deleted, the base system and kernel will be replaced, and if all went well the system will automatically reboot. Proceed with this action? [y/N]:
|
||||||
|
```
|
||||||
|
|
||||||
|
J’ai dit `y`. Ça commencé bien, puis… plus rien. Plus de signal. Plus d’Internet. Je croyais que ce bootstrap allait me sauver. En fait, il m’a enterré.
|
||||||
|
|
||||||
|
🙈 Oups.
|
||||||
|
|
||||||
|
Après un moment, j'ai tenté de le redémarré, mais impossible de me reconnecter en SSH. Pas le choix, j'ai du sortir le routeur du rack, le poser sur mon bureau, brancher écran et clavier et voir ce qui se passait.
|
||||||
|
|
||||||
|
## Repartir de zéro
|
||||||
|
|
||||||
|
C’était mauvais signe :
|
||||||
|
```
|
||||||
|
Fatal error: Uncaught Error: Class "OPNsense\Core\Config" not found
|
||||||
|
in /usr/local/etc/inc/config.inc:143
|
||||||
|
```
|
||||||
|
|
||||||
|
Et les logs du bootstrap étaient pires :
|
||||||
|
```
|
||||||
|
bad dir ino … mangled entry
|
||||||
|
Input/output error
|
||||||
|
```
|
||||||
|
|
||||||
|
Le disque était pas en forme. Je ne pouvais plus rien sauver. Il était temps de repartir de zéro. Heureusement, j’avais une sauvegarde… non ?
|
||||||
|
|
||||||
|
J’ai téléchargé l’ISO OPNsense 25.7, créé une clé USB bootable, et réinstallé par-dessus, en laissant les paramètres par défaut.
|
||||||
|
|
||||||
|
## Le sauveur : `config.xml`
|
||||||
|
|
||||||
|
OPNsense garde toute sa configuration dans un seul fichier : `/conf/config.xml`. Ce fichier a été ma bouée de sauvetage.
|
||||||
|
|
||||||
|
J'ai copié le `config.xml` sauvegardé avant dans ma clé USB. Quand je l'ai connectée sur la machine nouvellement installée, j'ai remplacé le fichier :
|
||||||
|
```bash
|
||||||
|
mount -t msdosfs /dev/da0s1 /mnt
|
||||||
|
cp /mnt/config.xml /conf/config.xml
|
||||||
|
```
|
||||||
|
|
||||||
|
J’ai remis le routeur dans le rack, croisé les doigts… *bip !* 🎉
|
||||||
|
|
||||||
|
Le DHCP m’a donné une adresse, bon signe. Je pouvais accéder à l’interface web, super. Ma configuration était là, à peu près tout sauf les plugins, comme prévu. Je ne peux pas les installer immédiatement, car ils nécessitent une autre mise à jour. Mettons à jour !
|
||||||
|
|
||||||
|
Ce fichier XML à lui seul m'a permis de reconstruire mon routeur sans perdre la raison.
|
||||||
|
|
||||||
|
Sans DNS (AdGuard non installé), j’ai temporairement pointé le DNS pour le système vers `1.1.1.1`.
|
||||||
|
|
||||||
|
## Le Dernier Souffle
|
||||||
|
|
||||||
|
Lors de la mise à jour suivante, rebelote : erreurs, reboot, crash. La machine de nouveau plus accessible...
|
||||||
|
|
||||||
|
Je pouvais officiellement déclarer mon disque NVMe mort.
|
||||||
|
|
||||||
|
🪦 Repose en paix, merci pour tes loyaux services.
|
||||||
|
|
||||||
|
Par chance, j’avais un NVMe Kingston 512 Go encore neuf, livré avec cette machine. Je ne l'avais jamais utilisé car j'avais préféré réutiliser celui à l'intérieur de mon serveur *Vertex*.
|
||||||
|
|
||||||
|
J’ai refait l’installation d'OPNsense dessus, et cette fois tout a fonctionné : passage en 25.7.1 et réinstallation des plugins officiels que j'utilisais.
|
||||||
|
|
||||||
|
Pour les plugins custom (AdGuard Home et UniFi), il a fallu ajouter le repo tiers dans `/usr/local/etc/pkg/repos/mimugmail.conf` (documentation [ici](https://www.routerperformance.net/opnsense-repo/))
|
||||||
|
```json
|
||||||
|
mimugmail: {
|
||||||
|
url: "https://opn-repo.routerperformance.net/repo/${ABI}",
|
||||||
|
priority: 5,
|
||||||
|
enabled: yes
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Après un dernier reboot, le routeur était presque prêt, mais je n'avais toujours pas de DNS. C'était à cause de AdGuard Home qui n'était pas configuré
|
||||||
|
|
||||||
|
⚠️ La configuration des plugins tiers ne sont pas sauvegardés dans `config.xml`.
|
||||||
|
|
||||||
|
Reconfigurer AdGuard Home n'était pas bien compliqué, finalement mon DNS fonctionne et t out était revenu à la normale… sauf le contrôleur UniFi.
|
||||||
|
|
||||||
|
## Leçons Apprises à la Dure
|
||||||
|
|
||||||
|
- **Les sauvegardes comptent** : Je me retrouve toujours à penser que les sauvegardes ne sont pas fondamentales... jusqu'à ce qu'on ait besoin de restaurer et qu'il est trop tard.
|
||||||
|
- **Gardez-les sauvegardes hors de la machine** : j’ai eu de la chance de récupérer le `config.xml` avant que mon disque me lâche. J'aurais vraiment passer un mauvais moment à tout restaurer entièrement.
|
||||||
|
- **Vérifier la santé après un crash** : ne pas ignorer un kernel panic.
|
||||||
|
- **Erreurs I/O = alerte rouge** : j’ai perdu des heures à batailler avec un disque condamné.
|
||||||
|
- **Les plugins non-officiels ne sont pas sauvegardés** : La configuration d'OPNsense et de ces plugins officiels sont sauvegardés, ce n'est pas le cas pour les autres.
|
||||||
|
- **Mon routeur est un SPOF** (*Un point de défaillance unique*) : Dans mon homelab, je voulais avoir le maximum d'éléments hautement disponible, il me faut trouver une meilleure solution.
|
||||||
|
|
||||||
|
## Aller de l’Avant
|
||||||
|
|
||||||
|
Je dois sérieusement repenser ma stratégie de sauvegarde. J’ai toujours repoussé, jusqu’à ce qu’il soit trop tard. Ça faisait longtemps que je n’avais pas subi une panne matérielle. Quand ça arrive, ça pique.
|
||||||
|
|
||||||
|
Au départ, je pensais qu’un routeur sur son propre hardware était plus sûr. J’avais tort. Je vais réfléchir à une virtualisation sous Proxmox pour l’avoir en haute dispo. Un beau projet en perspective !
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Mon routeur OPNsense est passé d’un simple redémarrage aléatoire à un disque mort, avec un vrai rollercoaster de dépannage. Au final, je suis presque content que ça soit arrivé : j’ai appris bien plus qu’avec une mise à jour sans accroc.
|
||||||
|
|
||||||
|
Si vous utilisez OPNsense (ou n’importe quel routeur), retenez ça :
|
||||||
|
**Gardez une sauvegarde hors de la machine.**
|
||||||
|
|
||||||
|
Parce que quand ça casse, et ça finira par casser, c’est ce petit fichier XML qui peut sauver tout votre homelab.
|
||||||
|
|
||||||
|
Restez safe, faites des sauvegardes.
|
225
content/post/10-opnsense-crash-disk-panic.md
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
---
|
||||||
|
slug: opnsense-crash-disk-panic
|
||||||
|
title: My OPNsense Router Crash, from Panic to Reborn
|
||||||
|
description: The story of how I survived an OPNsense crash with a failing disk and why one backup XML made all the difference.
|
||||||
|
date: 2025-08-24
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- opnsense
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
This week, I experienced my first real problem on my homelab, which caused my whole home network to go down.
|
||||||
|
|
||||||
|
My OPNsense router crashed and, after several failed recovery attempts, I finally had to reinstall it from scratch. Luckily, almost all of the configuration came back thanks to a single XML file. In that story, I will tell you what happened, what I did to recover and what I shouldn't have done.
|
||||||
|
|
||||||
|
This kind of exercise is the worst thing you want to happen because it's never funny to have everything go boom, but this is by far the best way to learn.
|
||||||
|
|
||||||
|
## The Calm Before the Storm
|
||||||
|
|
||||||
|
My OPNsense box had been running smoothly for months. Router, firewall, DNS, DHCP, VLANs, VPN, reverse proxy and even UniFi controller: all the pieces of my homelab run through it. but not only, it is also serving internet at home.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
This box is the heart of my network, without it, I can hardly do anything. I have detailed how this is working in my [Homelab]({{< ref "page/homelab" >}}) section. It was “just working,” and I wasn’t worried about it. I felt confident, its backup was living only inside the machine...
|
||||||
|
|
||||||
|
Maybe too confident.
|
||||||
|
|
||||||
|
## The Unexpected Reboot
|
||||||
|
|
||||||
|
Out of nowhere, the box rebooted by itself just before midnight. By chance, I was just passing by my rack on my way to bed. I knew it had rebooted because I heard its little startup beep.
|
||||||
|
|
||||||
|
I wondered why the router restarted without my will. In my bed, I quickly checked if internet was working, and it was. But none of my services were available, my home automation or even this blog. I was tired, I would fix that the next day...
|
||||||
|
|
||||||
|
In the morning, looking at the logs, I found the culprit:
|
||||||
|
```
|
||||||
|
panic: double fault
|
||||||
|
```
|
||||||
|
|
||||||
|
A kernel panic. My router had literally crashed at the hardware level.
|
||||||
|
|
||||||
|
## First Troubleshooting Attempts
|
||||||
|
|
||||||
|
At first, the impact seemed minor. Only one service wasn’t coming back up: Caddy, my reverse proxy. That was making sense if my services weren't available.
|
||||||
|
|
||||||
|
Digging into the logs, I found the error:
|
||||||
|
|
||||||
|
```
|
||||||
|
caching certificate: decoding certificate metadata: unexpected end of JSON input
|
||||||
|
```
|
||||||
|
|
||||||
|
It turned out that one of the cached certificates had been corrupted during the crash. Deleting its cache folder fixed Caddy, and suddenly all my HTTPS services were back online.
|
||||||
|
|
||||||
|
I thought I had dodged the bullet. I didn't investigate much on the root cause analysis: the kernel logs were polluted by one of the interfaces flapping, I thought it was just a bug. Instead, I went ahead and checked for updates, my first mistake.
|
||||||
|
|
||||||
|
My OPNsense instance was in version 25.1, and the newer 25.7 was available. Let's upgrade it, yay!
|
||||||
|
|
||||||
|
The upgrade rolled out successfully, but something was wrong. When I tried to look for any update, I saw a corruption in `pkg`, the package manager database:
|
||||||
|
```
|
||||||
|
pkg: sqlite error while executing iterator in file pkgdb_iterator.c:1110: database disk image is malformed
|
||||||
|
```
|
||||||
|
|
||||||
|
🚨 My internal alarm sensor triggered, I wondered about backups, I immediately decided to download the latest backup:
|
||||||
|

|
||||||
|
|
||||||
|
Clicking the `Download configuration` button, I downloaded the current `config.xml` in use my the instance, I though it was enough.
|
||||||
|
|
||||||
|
## Filesystem Corruption
|
||||||
|
|
||||||
|
I decided to recover the pkg database the worst possible way, I backed up the `/var/db/pkg` folder and I tried to `bootstrap` it:
|
||||||
|
```bash
|
||||||
|
cp -a /var/db/pkg /var/db/pkg.bak
|
||||||
|
pkg bootstrap -f
|
||||||
|
```
|
||||||
|
```
|
||||||
|
The package management tool is not yet installed on your system.
|
||||||
|
Do you want to fetch and install it now? [y/N]: y
|
||||||
|
Bootstrapping pkg from https://pkg.opnsense.org/FreeBSD:14:amd64/25.7/latest, please wait...
|
||||||
|
[...]
|
||||||
|
pkg-static: Fail to extract /usr/local/lib/libpkg.a from package: Write error
|
||||||
|
Failed to install the following 1 package(s): /tmp//pkg.pkg.scQnQs
|
||||||
|
[...]
|
||||||
|
A pre-built version of pkg could not be found for your system.
|
||||||
|
```
|
||||||
|
|
||||||
|
I saw a `Write error`, I suspect a filesystem problem, I run a check on `fsck`, the output was a flood of inconsistencies:
|
||||||
|
```bash
|
||||||
|
fsck -n
|
||||||
|
```
|
||||||
|
```
|
||||||
|
[...]
|
||||||
|
INCORRECT BLOCK COUNT I=13221121 (208384 should be 208192)
|
||||||
|
INCORRECT BLOCK COUNT I=20112491 (8 should be 0)
|
||||||
|
INCORRECT BLOCK COUNT I=20352874 (570432 should be 569856)
|
||||||
|
[...]
|
||||||
|
FREE BLK COUNT(S) WRONG IN SUPERBLK
|
||||||
|
[...]
|
||||||
|
SUMMARY INFORMATION BAD
|
||||||
|
[...]
|
||||||
|
BLK(S) MISSING IN BIT MAPS
|
||||||
|
[...]
|
||||||
|
***** FILE SYSTEM IS LEFT MARKED AS DIRTY *****
|
||||||
|
```
|
||||||
|
|
||||||
|
The root filesystem was in bad shape.
|
||||||
|
|
||||||
|
Since I only had SSH at this point and no console access, I set up a forced `fsck` for next reboot:
|
||||||
|
```bash
|
||||||
|
sysrc fsck_y_enable="YES"
|
||||||
|
sysrc background_fsck="NO"
|
||||||
|
reboot
|
||||||
|
```
|
||||||
|
|
||||||
|
On the next boot, the filesystem was repaired enough to let me bootstrap `pkg` again, but most of the system packages were gone. My earlier upgrade while the disk was dirty had left me with a half-installed, half-missing software.
|
||||||
|
|
||||||
|
## When Things Got Worse
|
||||||
|
|
||||||
|
I discovered the utility `opnsense-bootstrap`, which promises to reinstall all packages and reset the system to a clean release, exactly what I was looking for:
|
||||||
|
- Remove all installed packages.
|
||||||
|
- Fresh 25.7 base system and kernel will be downloaded and installed.
|
||||||
|
- All standard OPNsense packages will be reinstalled.
|
||||||
|
|
||||||
|
Wonderful!
|
||||||
|
```
|
||||||
|
opnsense-bootstrap
|
||||||
|
```
|
||||||
|
```
|
||||||
|
This utility will attempt to turn this installation into the latest OPNsense 25.7 release. All packages will be deleted, the base system and kernel will be replaced, and if all went well the system will automatically reboot. Proceed with this action? [y/N]:
|
||||||
|
```
|
||||||
|
|
||||||
|
I pressed `y`. This started well, but then... no more signal -> no more internet. I thought this bootstrap would save me. Instead, it buried me.
|
||||||
|
|
||||||
|
🙈 Oops.
|
||||||
|
|
||||||
|
After a while, I tried to reboot, but impossible to connect back via SSH. No other solution, I had to remove the router from the rack, put it on my desk and plug it a screen and a keyboard to see what is going on.
|
||||||
|
|
||||||
|
## Starting Over the Hard Way
|
||||||
|
|
||||||
|
This was bad:
|
||||||
|
```
|
||||||
|
Fatal error: Uncaught Error: Class "OPNsense\Core\Config" not found
|
||||||
|
in /usr/local/etc/inc/config.inc:143
|
||||||
|
```
|
||||||
|
|
||||||
|
Checking the bootstrap logs, this was even worse:
|
||||||
|
```
|
||||||
|
bad dir ino … mangled entry
|
||||||
|
Input/output error
|
||||||
|
```
|
||||||
|
|
||||||
|
The disk is in a bad shape, at this point, I couldn’t save the install anymore. Time to start from scratch. Luckily, I had a backup… right?
|
||||||
|
|
||||||
|
I downloaded the latest OPNsense ISO (v25.7) and put it into a USB stick. I reinstall OPNsense and overwrite the current installation, I kept everything as default.
|
||||||
|
|
||||||
|
## The Lifesaver: `config.xml`
|
||||||
|
|
||||||
|
OPNsense keeps the whole configuration in a single file: `/conf/config.xml`. That file was my lifeline.
|
||||||
|
|
||||||
|
I copied the `config.xml`file saved earlier into the USB stick. When plugged into the fresh OPNsense box, I overwrite the file:
|
||||||
|
```bash
|
||||||
|
mount -t msdosfs /dev/da0s1 /mnt
|
||||||
|
cp /mnt/config.xml /conf/config.xml
|
||||||
|
```
|
||||||
|
|
||||||
|
I placed the router back in the rack, powered it on and crossed my fingers... *beep!* 🎉
|
||||||
|
|
||||||
|
The DHCP gave me an address, good start. I could reach its URL, awesome. My configuration is here, almost everything but the plugins, as expected. I can't install them right away because they need another update, let's update it!
|
||||||
|
|
||||||
|
This single XML file is the reason I could rebuild my router without losing my sanity
|
||||||
|
|
||||||
|
DNS is KO because the AdGuard Home plugin is not installed, I temporary set the system DNS to `1.1.1.1`
|
||||||
|
|
||||||
|
## The Last Breath
|
||||||
|
|
||||||
|
During that upgrade, the system threw errors again… and then rebooted itself. Another crash, not turning back on...
|
||||||
|
|
||||||
|
I can officially say that my NVMe drive is dead.
|
||||||
|
|
||||||
|
🪦 Rest in peace, thank you for your great services.
|
||||||
|
|
||||||
|
Luckily, I had a spare 512GB Kingston NVMe that came with that box. I never used it because I preferred to reuse the one inside my *Vertex* server.
|
||||||
|
|
||||||
|
I redo the same steps to reinstall OPNsense on that disk and this time everything worked: I could finally update OPNsense to 25.7.1 and reinstall all the official plugins that I was using.
|
||||||
|
|
||||||
|
To install custom plugins (AdGuard Home and Unifi), I had to add the custom repository in `/usr/local/etc/pkg/repos/mimugmail.conf` (documentation [here](https://www.routerperformance.net/opnsense-repo/))
|
||||||
|
```json
|
||||||
|
mimugmail: {
|
||||||
|
url: "https://opn-repo.routerperformance.net/repo/${ABI}",
|
||||||
|
priority: 5,
|
||||||
|
enabled: yes
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
After a final reboot, the router is almost ready, but I still don't have DNS services. This is because AdGuard Home is not configured.
|
||||||
|
|
||||||
|
⚠️ Custom plugin configuration is not saved within the backup in `config.xml`.
|
||||||
|
|
||||||
|
Reconfigure AdGuard Home is pretty straight forward, finally my DNS is working and everything is back to nominal... except the UniFi controller.
|
||||||
|
|
||||||
|
## Lessons Learned the Hard Way
|
||||||
|
|
||||||
|
- **Backups matter**: I always found myself thinking backups are not relevant... until you need to restore and it's too late.
|
||||||
|
- **Keep backups off the box**: I was lucky to get the `config.xml` before my disk die, I would have a really hard time to fully recover.
|
||||||
|
- **Healthcheck after a crash**: Do not ignore a kernel panic.
|
||||||
|
- **I/O errors = red flag**: I should have stopped trying to repair. I lost hours fighting a dead disk.
|
||||||
|
- **Custom plugin configs aren’t include**d: OPNsense configuration and its official plugin are saved into the backups, this is not the case for the others.
|
||||||
|
- **My router is a SPOF** (*single point of failure*): In my homelab, I wanted to have most of my elements highly available, I need to find a better solution.
|
||||||
|
|
||||||
|
## Moving Forward
|
||||||
|
|
||||||
|
I really need to think on my backup strategy. I'm too lazy and always keep it for later, until it is too late. It's been a long time since I've been struck by a hardware failure. When it strikes, it hurts.
|
||||||
|
|
||||||
|
Initially I wanted my router to be in its own hardware because I thought it was safe, I was damn wrong. I will think on a solution to virtualize OPNsense in Proxmox to have it highly available, a great project in perspective!
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
My OPNsense router went from a random reboot to a dead disk, with a rollercoaster of troubleshooting. In the end, I'm almost happy with what happened, it taught me more than any smooth upgrade ever could.
|
||||||
|
|
||||||
|
If you run OPNsense (or any router), remember this:
|
||||||
|
**Keep a backup off the box.**
|
||||||
|
|
||||||
|
Because when things go wrong, and eventually they will, that one little XML file can save your homelab.
|
||||||
|
|
||||||
|
Stay safe, make backups.
|
148
content/post/11-proxmox-cluster-networking-sdn.fr.md
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
---
|
||||||
|
slug: proxmox-cluster-networking-sdn
|
||||||
|
title: Simplifier la gestion des VLAN dans Proxmox VE avec le SDN
|
||||||
|
description: Découvrez comment centraliser la configuration des VLAN dans Proxmox VE grâce aux zones SDN et aux VNets, pour un réseau plus simple et cohérent.
|
||||||
|
date: 2025-09-12
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- proxmox
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Quand j’ai construit mon cluster **Proxmox VE 8** pour la première fois, le réseau n’était pas ma priorité. Je voulais simplement remplacer rapidement un vieux serveur physique, alors j’ai donné la même configuration de base à chacun de mes trois nœuds, créé le cluster et commencé à créer des VM :
|
||||||
|

|
||||||
|
|
||||||
|
Cela a bien fonctionné pendant un moment. Mais comme je prévois de virtualiser mon routeur **OPNsense**, j’ai besoin de quelque chose de plus structuré et cohérent. C’est là que la fonctionnalité **S**oftware-**D**efined **N**etworking (SDN) de Proxmox entre en jeu.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Mon Réseau Homelab
|
||||||
|
|
||||||
|
Par défaut, chaque nœud Proxmox dispose de sa propre zone locale, appelée `localnetwork`, qui contient le pont Linux par défaut (`vmbr0`) comme VNet :
|
||||||
|

|
||||||
|
|
||||||
|
C’est suffisant pour des configurations isolées, mais rien n’est coordonné au niveau du cluster.
|
||||||
|
|
||||||
|
Mon objectif est simple : déclarer les VLAN que j’utilise déjà dans mon réseau, afin de pouvoir y rattacher des VM facilement depuis n’importe quel nœud.
|
||||||
|
|
||||||
|
Voici la liste des VLAN que j’utilise actuellement :
|
||||||
|
|
||||||
|
| Nom | ID | Usage |
|
||||||
|
| --------- | ---- | ------------------------------ |
|
||||||
|
| Mgmt | 1 | Administration |
|
||||||
|
| User | 13 | Réseau domestique |
|
||||||
|
| IoT | 37 | IoT et équipements non fiables |
|
||||||
|
| DMZ | 55 | Services exposés à Internet |
|
||||||
|
| Lab | 66 | Réseau de lab |
|
||||||
|
| Heartbeat | 77 | Heartbeat du cluster Proxmox |
|
||||||
|
| Ceph | 99 | Stockage Ceph |
|
||||||
|
| VPN | 1337 | Réseau WireGuard |
|
||||||
|
|
||||||
|
---
|
||||||
|
## Aperçu du SDN Proxmox
|
||||||
|
|
||||||
|
Le Software-Defined Networking de Proxmox permet de définir des zones et réseaux virtuels à l’échelle du cluster. Au lieu de répéter la configuration des VLAN sur chaque nœud, le SDN offre une vue centralisée et assure la cohérence.
|
||||||
|
|
||||||
|
En interne, Proxmox repose essentiellement sur les fonctionnalités réseau standard de Linux, ce qui évite d’ajouter des dépendances externes et garantit la stabilité.
|
||||||
|
|
||||||
|
Les configurations SDN sont stockées dans `/etc/pve/sdn` et répliquées sur l’ensemble du cluster. Les changements sont appliqués de manière atomique (on prépare les modifications puis on clique sur `Apply`), ce qui rend les déploiements plus sûrs.
|
||||||
|
|
||||||
|
### Zones
|
||||||
|
|
||||||
|
Une **Zone** définit un domaine réseau séparé. Les zones peuvent couvrir certains nœuds et contenir des **VNets**.
|
||||||
|
|
||||||
|
Proxmox prend en charge plusieurs types de zones :
|
||||||
|
- **Simple** : pont isolé (bridge) avec routage L3/NAT
|
||||||
|
- **VLAN** : segmentation classique via VLAN
|
||||||
|
- **QinQ** : empilement de VLAN (IEEE 802.1ad)
|
||||||
|
- **VXLAN** : réseau L2 via encapsulation UDP
|
||||||
|
- **EVPN** : VXLAN avec BGP pour du routage L3 dynamique
|
||||||
|
|
||||||
|
Comme mon réseau domestique utilise déjà des VLAN, j’ai créé une **zone VLAN** appelée `homelan`, en utilisant `vmbr0` comme pont et en l’appliquant à tout le cluster :
|
||||||
|

|
||||||
|
|
||||||
|
### VNets
|
||||||
|
|
||||||
|
Un **VNet** est un réseau virtuel à l’intérieur d’une zone. Dans une zone VLAN, chaque VNet correspond à un ID VLAN spécifique.
|
||||||
|
|
||||||
|
J’ai commencé par créer `vlan55` dans la zone `homelan` pour mon réseau DMZ :
|
||||||
|

|
||||||
|
|
||||||
|
Puis j’ai ajouté les VNets correspondant à la plupart de mes VLAN, puisque je prévois de les rattacher à une VM OPNsense :
|
||||||
|

|
||||||
|
|
||||||
|
Enfin, j’ai appliqué la configuration dans **Datacenter → SDN** :
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
## Test de la Configuration Réseau
|
||||||
|
|
||||||
|
Dans une vieille VM que je n'utilise plus, je remplace l'actuel `vmbr0` avec le VLAN tag 66 par mon nouveau VNet `vlan66`:
|
||||||
|

|
||||||
|
|
||||||
|
Après l'avoir démarrée, la VM obtient une IP du DHCP d'OPNsense sur ce VLAN, ce qui est super. J'essaye également de ping une autre machine et ça fonctionne :
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
## Mise à jour de Cloud-Init et Terraform
|
||||||
|
|
||||||
|
Pour aller plus loin, j’ai mis à jour le pont réseau utilisé dans mon **template cloud-init**, dont j'avais détaillé la création dans [cet article]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
|
||||||
|
Comme avec la VM précédente, j’ai remplacé `vmbr0` et le tag VLAN 66 par le nouveau VNet `vlan66`.
|
||||||
|
|
||||||
|
J’ai aussi adapté mon code **Terraform** pour refléter ce changement :
|
||||||
|

|
||||||
|
|
||||||
|
Ensuite, j’ai validé qu’aucune régression n’était introduite en déployant une VM de test :
|
||||||
|
```bash
|
||||||
|
terraform apply -var 'vm_name=vm-test-vnet'
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
data.proxmox_virtual_environment_vms.template: Reading...
|
||||||
|
data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=23b17aea-d9f7-4f28-847f-41bb013262ea]
|
||||||
|
[...]
|
||||||
|
Plan: 2 to add, 0 to change, 0 to destroy.
|
||||||
|
|
||||||
|
Changes to Outputs:
|
||||||
|
+ vm_ip = (known after apply)
|
||||||
|
|
||||||
|
Do you want to perform these actions?
|
||||||
|
Terraform will perform the actions described above.
|
||||||
|
Only 'yes' will be accepted to approve.
|
||||||
|
|
||||||
|
Enter a value: yes
|
||||||
|
|
||||||
|
proxmox_virtual_environment_file.cloud_config: Creating...
|
||||||
|
proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
|
||||||
|
proxmox_virtual_environment_vm.vm: Creating...
|
||||||
|
proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
|
||||||
|
[...]
|
||||||
|
proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
|
||||||
|
proxmox_virtual_environment_vm.vm: Creation complete after 3m9s [id=119]
|
||||||
|
|
||||||
|
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
|
||||||
|
vm_ip = "192.168.66.181"
|
||||||
|
```
|
||||||
|
|
||||||
|
La création s’est déroulée sans problème, tout est bon :
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
La mise en place du SDN Proxmox avec une **zone VLAN** est simple et très pratique. Au lieu de définir manuellement un VLAN sur chaque VM, je sélectionne désormais directement le bon VNet, et tout reste cohérent dans le cluster.
|
||||||
|
|
||||||
|
| Étape | Avant SDN | Après SDN |
|
||||||
|
| -------------------- | ----------------------------- | ----------------------------------- |
|
||||||
|
| Rattacher une VM | `vmbr0` + tag VLAN manuel | Sélection du VNet approprié |
|
||||||
|
| VLANs sur les nœuds | Config répété sur chaque nœud | Centralisé via le SDN du cluster |
|
||||||
|
| Gestion des adresses | Manuel ou via DHCP uniquement | IPAM optionnel via sous-réseaux SDN |
|
||||||
|
|
||||||
|
Mon cluster est maintenant prêt à héberger mon **routeur OPNsense**, et cette base ouvre la voie à d’autres expérimentations, comme les overlays VXLAN ou l’EVPN avec BGP.
|
||||||
|
|
||||||
|
À suivre pour la prochaine étape !
|
||||||
|
|
148
content/post/11-proxmox-cluster-networking-sdn.md
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
---
|
||||||
|
slug: proxmox-cluster-networking-sdn
|
||||||
|
title: Simplifying VLAN Management in Proxmox VE with SDN
|
||||||
|
description: Learn how to centralize VLAN configuration in Proxmox VE using SDN zones and VNets, making VM networking easier and more consistent.
|
||||||
|
date: 2025-09-12
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- proxmox
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
When I first built my **Proxmox VE 8** cluster, networking wasn’t my main concern. I just wanted to replace an old physical server quickly, so I gave each of my three nodes the same basic config, created the cluster, and started running VMs:
|
||||||
|

|
||||||
|
|
||||||
|
That worked fine for a while. But as I plan to virtualize my **OPNsense** router, I need something more structured and consistent. This is where Proxmox **S**oftware-**D**efined **N**etworking (SDN) feature comes in.
|
||||||
|
|
||||||
|
---
|
||||||
|
## My Homelab Network
|
||||||
|
|
||||||
|
By default, every Proxmox node comes with its own local zone, called `localnetwork`, which contains the default Linux bridge (`vmbr0`) as a VNet:
|
||||||
|

|
||||||
|
|
||||||
|
That’s fine for isolated setups, but at the cluster level nothing is coordinated.
|
||||||
|
|
||||||
|
What I want is simple: declare the VLANs I already use in my network, so I can attach VMs to them easily from any node.
|
||||||
|
|
||||||
|
Here’s the list of VLANs I use today:
|
||||||
|
|
||||||
|
| Name | ID | Purpose |
|
||||||
|
| --------- | ---- | ---------------------------- |
|
||||||
|
| Mgmt | 1 | Management |
|
||||||
|
| User | 13 | Home network |
|
||||||
|
| IoT | 37 | IoT and untrusted equipments |
|
||||||
|
| DMZ | 55 | Internet facing |
|
||||||
|
| Lab | 66 | Lab network |
|
||||||
|
| Heartbeat | 77 | Proxmox cluster heartbeat |
|
||||||
|
| Ceph | 99 | Ceph storage |
|
||||||
|
| VPN | 1337 | Wireguard network |
|
||||||
|
|
||||||
|
---
|
||||||
|
## Proxmox SDN Overview
|
||||||
|
|
||||||
|
Proxmox Software-Defined Networking makes it possible to define cluster-wide virtual zones and networks. Instead of repeating VLAN configs on every node, SDN gives you a central view and ensures consistency.
|
||||||
|
|
||||||
|
Under the hood, Proxmox mostly uses standard Linux networking, avoiding extra dependencies and keeping things stable.
|
||||||
|
|
||||||
|
SDN configurations are stored in `/etc/pve/sdn`, which is replicated across the cluster. Changes are applied atomically (you prepare them, then hit `Apply` once), making rollouts safer.
|
||||||
|
|
||||||
|
### Zones
|
||||||
|
|
||||||
|
A **Zone** defines a separate networking domain. Zones can span specific nodes and contain **VNets**.
|
||||||
|
|
||||||
|
Proxmox supports several zone types:
|
||||||
|
- **Simple**: Isolated Bridge. A simple layer 3 routing bridge (NAT)
|
||||||
|
- **VLAN**: Virtual LANs are the classic method of subdividing a LAN
|
||||||
|
- **QinQ**: Stacked VLAN (IEEE 802.1ad)
|
||||||
|
- **VXLAN**: Layer 2 VXLAN network via a UDP tunnel
|
||||||
|
- **EVPN**: VXLAN with BGP to establish Layer 3 routing
|
||||||
|
|
||||||
|
Since my home network already relies on VLANs, I created a **VLAN Zone** named `homelan`, using `vmbr0` as the bridge and applying it cluster-wide:
|
||||||
|

|
||||||
|
|
||||||
|
### VNets
|
||||||
|
|
||||||
|
A **VNet** is a virtual network inside a zone. In a VLAN zone, each VNet corresponds to a specific VLAN ID.
|
||||||
|
|
||||||
|
I started by creating `vlan55` in the `homelan` zone for my DMZ network:
|
||||||
|

|
||||||
|
|
||||||
|
Then I added VNets for most of my VLANs, since I plan to attach them to an OPNsense VM:
|
||||||
|

|
||||||
|
|
||||||
|
Finally, I applied the configuration in **Datacenter → SDN**:
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
## Test the Network Configuration
|
||||||
|
|
||||||
|
In a old VM which I don't use anymore, I replace the current `vmbr0` with VLAN tag 66 to my new VNet `vlan66`:
|
||||||
|

|
||||||
|
|
||||||
|
After starting it, the VM gets an IP from the DHCP on OPNsense on that VLAN, which sounds good. I also try to ping another machine and it works:
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
## Update Cloud-Init Template and Terraform
|
||||||
|
|
||||||
|
To go further, I update the bridge used in my **cloud-init** template, which I detailed the creation in that [post]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}). Pretty much the same thing I've done with the VM, I replace the current `vmbr0` with VLAN tag 66 with my new VNet `vlan66`.
|
||||||
|
|
||||||
|
I also update the **Terrafom** code to take this change into account:
|
||||||
|

|
||||||
|
|
||||||
|
I quicky check if I don't have regression and can still deploy a VM with Terraform:
|
||||||
|
```bash
|
||||||
|
terraform apply -var 'vm_name=vm-test-vnet'
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
data.proxmox_virtual_environment_vms.template: Reading...
|
||||||
|
data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=23b17aea-d9f7-4f28-847f-41bb013262ea]
|
||||||
|
[...]
|
||||||
|
Plan: 2 to add, 0 to change, 0 to destroy.
|
||||||
|
|
||||||
|
Changes to Outputs:
|
||||||
|
+ vm_ip = (known after apply)
|
||||||
|
|
||||||
|
Do you want to perform these actions?
|
||||||
|
Terraform will perform the actions described above.
|
||||||
|
Only 'yes' will be accepted to approve.
|
||||||
|
|
||||||
|
Enter a value: yes
|
||||||
|
|
||||||
|
proxmox_virtual_environment_file.cloud_config: Creating...
|
||||||
|
proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/vm.cloud-config.yaml]
|
||||||
|
proxmox_virtual_environment_vm.vm: Creating...
|
||||||
|
proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
|
||||||
|
[...]
|
||||||
|
proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
|
||||||
|
proxmox_virtual_environment_vm.vm: Creation complete after 3m9s [id=119]
|
||||||
|
|
||||||
|
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
|
||||||
|
vm_ip = "192.168.66.181"
|
||||||
|
```
|
||||||
|
|
||||||
|
The VM is deploying without any issue, everything is OK:
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Setting up Proxmox SDN with a **VLAN zone** turned out to be straightforward and very useful. Instead of tagging VLANs manually per VM, I now just pick the right VNet, and everything stays consistent across the cluster.
|
||||||
|
|
||||||
|
| Step | Before SDN | After SDN |
|
||||||
|
| ----------------- | ------------------------------- | ------------------------------ |
|
||||||
|
| Attach VM to VLAN | `vmbr0` + set VLAN tag manually | Select the right VNet directly |
|
||||||
|
| VLANs on nodes | Repeated config per node | Centralized in cluster SDN |
|
||||||
|
| IP management | Manual or DHCP only | Optional IPAM via SDN subnets |
|
||||||
|
|
||||||
|
This prepares my cluster to host my **OPNsense router**, and it also sets the stage for future experiments, like trying out VXLAN overlays or EVPN with BGP.
|
||||||
|
|
||||||
|
See you next time for the next step!
|
||||||
|
|
||||||
|
|
281
content/post/12-opnsense-virtualization-highly-available.fr.md
Normal file
@@ -0,0 +1,281 @@
|
|||||||
|
---
|
||||||
|
slug: opnsense-virtualization-highly-available
|
||||||
|
title: Construire un Cluster OPNsense Hautement Disponible sur Proxmox VE
|
||||||
|
description: Une preuve de concept montrant comment virtualiser OPNsense sur Proxmox VE, configurer la haute disponibilité avec CARP et pfSync, et gérer une seule IP WAN.
|
||||||
|
date: 2025-09-29
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- opnsense
|
||||||
|
- proxmox
|
||||||
|
- high-availability
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
J’ai récemment rencontré mon premier vrai problème, ma box **OPNsense** physique a planté à cause d’un _kernel panic_. J’ai détaillé ce qu'il s'est passé dans [cet article]({{< ref "post/10-opnsense-crash-disk-panic" >}}).
|
||||||
|
|
||||||
|
Cette panne m’a fait repenser mon installation. Un seul pare-feu est un point de défaillance unique, donc pour améliorer la résilience j’ai décidé de prendre une nouvelle approche : **virtualiser OPNsense**.
|
||||||
|
|
||||||
|
Évidemment, faire tourner une seule VM ne suffirait pas. Pour obtenir une vraie redondance, il me faut deux instances OPNsense en **Haute Disponibilité**, l’une active et l’autre en attente.
|
||||||
|
|
||||||
|
Avant de déployer ça sur mon réseau, j’ai voulu valider l’idée dans mon homelab. Dans cet article, je vais détailler la preuve de concept : déployer deux VM OPNsense dans un cluster **Proxmox VE** et les configurer pour fournir un pare-feu hautement disponible.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Infrastructure Actuelle
|
||||||
|
|
||||||
|
Au sommet de mon installation, mon modem FAI, une _Freebox_ en mode bridge, relié directement à l’interface `igc0` de ma box OPNsense, servant d’interface **WAN**. Sur `igc1`, le **LAN** est connecté à mon switch principal via un port trunk, avec le VLAN 1 comme VLAN natif pour mon réseau de management.
|
||||||
|
|
||||||
|
Ce switch relie également mes trois nœuds Proxmox, chacun sur un port trunk avec le même VLAN natif. Chaque nœud dispose de deux cartes réseau : une pour le trafic général, et l’autre dédiée au réseau de stockage Ceph, connecté à un switch séparé de 2,5 Gbps.
|
||||||
|
|
||||||
|
Depuis le crash d’OPNsense, j’ai simplifié l’architecture en supprimant le lien LACP, qui n’apportait pas de réelle valeur :
|
||||||
|

|
||||||
|
|
||||||
|
Jusqu’à récemment, le réseau Proxmox de mon cluster était très basique : chaque nœud était configuré individuellement sans véritable logique commune. Cela a changé après la découverte du SDN Proxmox, qui m’a permis de centraliser les définitions de VLAN sur l’ensemble du cluster. J’ai décrit cette étape dans [cet article]({{< ref "post/11-proxmox-cluster-networking-sdn" >}}).
|
||||||
|
|
||||||
|
---
|
||||||
|
## Preuve de Concept
|
||||||
|
|
||||||
|
Place au lab. Voici les étapes principales :
|
||||||
|
1. Ajouter quelques VLANs dans mon homelab
|
||||||
|
2. Créer un faux routeur FAI
|
||||||
|
3. Construire deux VMs OPNsense
|
||||||
|
4. Configurer la haute disponibilité
|
||||||
|
5. Tester la bascule
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Ajouter des VLANs dans mon homelab
|
||||||
|
|
||||||
|
Pour cette expérimentation, je crée trois nouveaux VLANs :
|
||||||
|
- **VLAN 101** : _POC WAN_
|
||||||
|
- **VLAN 102** : _POC LAN_
|
||||||
|
- **VLAN 103** : _POC pfSync_
|
||||||
|
|
||||||
|
Dans l’interface Proxmox, je vais dans `Datacenter` > `SDN` > `VNets` et je clique sur `Create` :
|
||||||
|

|
||||||
|
|
||||||
|
Une fois les trois VLANs créés, j’applique la configuration.
|
||||||
|
|
||||||
|
J’ajoute ensuite ces trois VLANs dans mon contrôleur UniFi. Ici, seul l’ID et le nom sont nécessaires, le contrôleur se charge de les propager via les trunks connectés à mes nœuds Proxmox VE.
|
||||||
|
|
||||||
|
### Créer une VM “Fausse Box FAI”
|
||||||
|
|
||||||
|
Pour simuler mon modem FAI actuel, j’ai créé une VM appelée `fake-freebox`. Cette VM route le trafic entre les réseaux _POC WAN_ et _Lab_, et fait tourner un serveur DHCP qui ne délivre qu’un seul bail, exactement comme ma vraie Freebox en mode bridge.
|
||||||
|
|
||||||
|
Cette VM dispose de 2 cartes réseau, que je configure avec Netplan :
|
||||||
|
- `eth0` (_POC WAN_ VLAN 101) : adresse IP statique `10.101.0.254/24`
|
||||||
|
- `enp6s19` (Lab VLAN 66) : adresse IP obtenue en DHCP depuis mon routeur OPNsense actuel, en amont
|
||||||
|
```yaml
|
||||||
|
network:
|
||||||
|
version: 2
|
||||||
|
ethernets:
|
||||||
|
eth0:
|
||||||
|
addresses:
|
||||||
|
- 10.101.0.254/24
|
||||||
|
enp6s19:
|
||||||
|
dhcp4: true
|
||||||
|
```
|
||||||
|
|
||||||
|
J’active ensuite le routage IP pour permettre à cette VM de router le trafic :
|
||||||
|
```bash
|
||||||
|
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
|
||||||
|
sudo sysctl -p
|
||||||
|
```
|
||||||
|
|
||||||
|
Puis je configure du masquage (NAT) afin que les paquets sortant via le réseau Lab ne soient pas rejetés par mon OPNsense actuel :
|
||||||
|
```bash
|
||||||
|
sudo iptables -t nat -A POSTROUTING -o enp6s19 -j MASQUERADE
|
||||||
|
sudo apt install iptables-persistent -y
|
||||||
|
sudo netfilter-persistent save
|
||||||
|
```
|
||||||
|
|
||||||
|
J’installe `dnsmasq` comme serveur DHCP léger :
|
||||||
|
```bash
|
||||||
|
sudo apt install dnsmasq -y
|
||||||
|
```
|
||||||
|
|
||||||
|
Dans `/etc/dnsmasq.conf`, je configure un bail unique (`10.101.0.150`) et je pointe le DNS vers mon OPNsense actuel, sur le VLAN _Lab_ :
|
||||||
|
```
|
||||||
|
interface=eth0
|
||||||
|
bind-interfaces
|
||||||
|
dhcp-range=10.101.0.150,10.101.0.150,255.255.255.0,12h
|
||||||
|
dhcp-option=3,10.101.0.254 # default gateway = this VM
|
||||||
|
dhcp-option=6,192.168.66.1 # DNS server
|
||||||
|
```
|
||||||
|
|
||||||
|
Je redémarre le service `dnsmasq` pour appliquer la configuration :
|
||||||
|
```bash
|
||||||
|
sudo systemctl restart dnsmasq
|
||||||
|
```
|
||||||
|
|
||||||
|
La VM `fake-freebox` est maintenant prête à fournir du DHCP sur le VLAN 101, avec un seul bail disponible.
|
||||||
|
|
||||||
|
### Construire les VMs OPNsense
|
||||||
|
|
||||||
|
Je commence par télécharger l’ISO d’OPNsense et je l’upload sur un de mes nœuds Proxmox :
|
||||||
|

|
||||||
|
|
||||||
|
#### Création de la VM
|
||||||
|
|
||||||
|
Je crée la première VM `poc-opnsense-1` avec les paramètres suivants :
|
||||||
|
- Type d’OS : Linux (même si OPNsense est basé sur FreeBSD)
|
||||||
|
- Type de machine : `q35`
|
||||||
|
- BIOS : `OVMF (UEFI)`, stockage EFI sur mon pool Ceph
|
||||||
|
- Disque : 20 Gio sur Ceph
|
||||||
|
- CPU/RAM : 2 vCPU, 2 Gio de RAM
|
||||||
|
- Cartes réseau :
|
||||||
|
1. VLAN 101 (_POC WAN_)
|
||||||
|
2. VLAN 102 (_POC LAN_)
|
||||||
|
3. VLAN 103 (_POC pfSync_)
|
||||||
|

|
||||||
|
|
||||||
|
ℹ️ Avant de la démarrer, je clone cette VM pour préparer la seconde : `poc-opnsense-2`
|
||||||
|
|
||||||
|
Au premier démarrage, je tombe sur une erreur “access denied”. Pour corriger, j’entre dans le BIOS, **Device Manager > Secure Boot Configuration**, je décoche _Attempt Secure Boot_ et je redémarre :
|
||||||
|

|
||||||
|
|
||||||
|
#### Installation d’OPNsense
|
||||||
|
|
||||||
|
La VM démarre sur l’ISO, je ne touche à rien jusqu’à l’écran de login :
|
||||||
|

|
||||||
|
|
||||||
|
Je me connecte avec `installer` / `opnsense` et je lance l’installateur. Je sélectionne le disque QEMU de 20 Go comme destination et je démarre l’installation :
|
||||||
|

|
||||||
|
|
||||||
|
Une fois terminé, je retire l’ISO du lecteur et je redémarre la machine.
|
||||||
|
|
||||||
|
#### Configuration de Base d’OPNsense
|
||||||
|
|
||||||
|
Au redémarrage, je me connecte avec `root` / `opnsense` et j’arrive au menu CLI :
|
||||||
|

|
||||||
|
|
||||||
|
Avec l’option 1, je réassigne les interfaces :
|
||||||
|

|
||||||
|
|
||||||
|
L’interface WAN récupère bien `10.101.0.150/24` depuis la `fake-freebox`. Je configure le LAN sur `10.102.0.2/24` et j’ajoute un pool DHCP de `10.102.0.10` à `10.102.0.99` :
|
||||||
|

|
||||||
|
|
||||||
|
✅ La première VM est prête, je reproduis l’opération pour la seconde OPNsense `poc-opnsense-2`, qui aura l’IP `10.102.0.3`.
|
||||||
|
|
||||||
|
### Configurer OPNsense en Haute Disponibilité
|
||||||
|
|
||||||
|
Avec les deux VMs OPNsense opérationnelles, il est temps de passer à la configuration via le WebGUI. Pour y accéder, j’ai connecté une VM Windows au VLAN _POC LAN_ et ouvert l’IP de l’OPNsense sur le port 443 :
|
||||||
|

|
||||||
|
|
||||||
|
#### Ajouter l’Interface pfSync
|
||||||
|
|
||||||
|
La troisième carte réseau (`vtnet2`) est assignée à l’interface _pfSync_. Ce réseau dédié permet aux deux firewalls de synchroniser leurs états via le VLAN _POC pfSync_ :
|
||||||
|

|
||||||
|
|
||||||
|
J’active l’interface sur chaque instance et je leur attribue une IP statique :
|
||||||
|
- **poc-opnsense-1** : `10.103.0.2/24`
|
||||||
|
- **poc-opnsense-2** : `10.103.0.3/24`
|
||||||
|
|
||||||
|
Puis, j’ajoute une règle firewall sur chaque nœud pour autoriser tout le trafic provenant de ce réseau sur l’interface _pfSync_ :
|
||||||
|

|
||||||
|
|
||||||
|
#### Configurer la Haute Disponibilité
|
||||||
|
|
||||||
|
Direction `System` > `High Availability` > `Settings`.
|
||||||
|
- Sur le master (`poc-opnsense-1`), je configure les `General Settings` et les `Synchronization Settings`.
|
||||||
|
- Sur le backup (`poc-opnsense-2`), seuls les `General Settings` suffisent (on ne veut pas qu’il écrase la config du master).
|
||||||
|

|
||||||
|
|
||||||
|
Une fois appliqué, je vérifie la synchro dans l’onglet `Status` :
|
||||||
|

|
||||||
|
|
||||||
|
#### Créer une IP Virtuelle
|
||||||
|
|
||||||
|
Pour fournir une passerelle partagée aux clients, je crée une IP virtuelle (VIP) en **CARP** (Common Address Redundancy Protocol) sur l’interface LAN. L’IP est portée par le nœud actif et bascule automatiquement en cas de failover.
|
||||||
|
|
||||||
|
Menu : `Interfaces` > `Virtual IPs` > `Settings` :
|
||||||
|

|
||||||
|
|
||||||
|
Je réplique ensuite la config depuis `System > High Availability > Status` avec le bouton `Synchronize and reconfigure all`.
|
||||||
|
|
||||||
|
Sur `Interfaces > Virtual IPs > Status`, le master affiche la VIP en `MASTER` et le backup en `BACKUP`.
|
||||||
|
|
||||||
|
#### Reconfigurer le DHCP
|
||||||
|
|
||||||
|
Pour la HA, il faut adapter le DHCP. Comme **Dnsmasq** ne supporte pas la synchro des baux, chaque instance doit répondre indépendamment.
|
||||||
|
|
||||||
|
Sur le master :
|
||||||
|
- `Services` > `Dnsmasq DNS & DHCP` > `General` : cocher `Disable HA sync`
|
||||||
|
- `DHCP ranges` : cocher aussi `Disable HA sync`
|
||||||
|
- `DHCP options` : ajouter l’option `router [3]` avec la valeur `10.102.0.1` (VIP LAN)
|
||||||
|
- `DHCP options` : cloner la règle pour `dns-server [6]` vers la même VIP.
|
||||||
|

|
||||||
|
|
||||||
|
Sur le backup :
|
||||||
|
- `Services` > `Dnsmasq DNS & DHCP` > `General` : cocher `Disable HA sync`
|
||||||
|
- Régler `DHCP reply delay` à `5` secondes (laisser la priorité au master)
|
||||||
|
- `DHCP ranges` : définir un autre pool, plus petit (`10.102.0.200 -> 220`).
|
||||||
|
|
||||||
|
Ainsi, seules les **options** DHCP sont synchronisées, les plages restant distinctes.
|
||||||
|
|
||||||
|
#### Interface WAN
|
||||||
|
|
||||||
|
Mon modem FAI n’attribue qu’une seule IP en DHCP, je ne veux pas que mes 2 VMs entrent en compétition. Pour gérer ça :
|
||||||
|
1. Dans Proxmox, je copie l’adresse MAC de `net0` (WAN) de `poc-opnsense-1` et je l’applique à `poc-opnsense-2`. Ainsi, le bail DHCP est partagé.
|
||||||
|
⚠️ Si les deux VMs activent la même MAC en même temps, cela provoque des conflits ARP et peut casser le réseau. Seul le MASTER doit activer son WAN.
|
||||||
|
2. Un hook event CARP procure la possibilité de lancer des scripts. J’ai déployé ce [script Gist](https://gist.github.com/spali/2da4f23e488219504b2ada12ac59a7dc#file-10-wancarp) dans `/usr/local/etc/rc.syshook.d/carp/10-wan` sur les deux nœuds. Ce script active le WAN uniquement sur le MASTER.
|
||||||
|
```php
|
||||||
|
#!/usr/local/bin/php
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require_once("config.inc");
|
||||||
|
require_once("interfaces.inc");
|
||||||
|
require_once("util.inc");
|
||||||
|
require_once("system.inc");
|
||||||
|
|
||||||
|
$subsystem = !empty($argv[1]) ? $argv[1] : '';
|
||||||
|
$type = !empty($argv[2]) ? $argv[2] : '';
|
||||||
|
|
||||||
|
if ($type != 'MASTER' && $type != 'BACKUP') {
|
||||||
|
log_error("Carp '$type' event unknown from source '{$subsystem}'");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strstr($subsystem, '@')) {
|
||||||
|
log_error("Carp '$type' event triggered from wrong source '{$subsystem}'");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
$ifkey = 'wan';
|
||||||
|
|
||||||
|
if ($type === "MASTER") {
|
||||||
|
log_error("enable interface '$ifkey' due CARP event '$type'");
|
||||||
|
$config['interfaces'][$ifkey]['enable'] = '1';
|
||||||
|
write_config("enable interface '$ifkey' due CARP event '$type'", false);
|
||||||
|
interface_configure(false, $ifkey, false, false);
|
||||||
|
} else {
|
||||||
|
log_error("disable interface '$ifkey' due CARP event '$type'");
|
||||||
|
unset($config['interfaces'][$ifkey]['enable']);
|
||||||
|
write_config("disable interface '$ifkey' due CARP event '$type'", false);
|
||||||
|
interface_configure(false, $ifkey, false, false);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tester le Failover
|
||||||
|
|
||||||
|
Passons aux tests !
|
||||||
|
|
||||||
|
OPNsense propose un _CARP Maintenance Mode_. Avec le master actif, seul lui avait son WAN monté. En activant le mode maintenance, les rôles basculent : le master devient backup, son WAN est désactivé et celui du backup est activé :
|
||||||
|

|
||||||
|
|
||||||
|
Pendant mes pings vers l’extérieur, aucune perte de paquets au moment du basculement.
|
||||||
|
|
||||||
|
Ensuite, j’ai simulé un crash en éteignant le master. Le backup a pris le relais de façon transparente, seulement un paquet perdu, et grâce à la synchro des états, même ma session SSH est restée ouverte. 🎉
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Cette preuve de concept démontre qu’il est possible de faire tourner **OPNsense en haute dispo sous Proxmox VE**, même avec une seule IP WAN. Les briques nécessaires :
|
||||||
|
- Segmentation VLAN
|
||||||
|
- Réseau dédié pfSync
|
||||||
|
- IP virtuelle partagée (CARP)
|
||||||
|
- Script pour gérer l’interface WAN
|
||||||
|
|
||||||
|
Le résultat est à la hauteur : failover transparent, synchro des états, et connexions actives qui survivent à un crash. Le point le plus délicat reste la gestion du bail WAN, mais le hook CARP règle ce problème.
|
||||||
|
|
||||||
|
🚀 Prochaine étape : préparer un nouveau cluster OPNsense HA sur Proxmox en vue de remplacer complètement ma box physique actuel. Restez à l'écoute !
|
283
content/post/12-opnsense-virtualization-highly-available.md
Normal file
@@ -0,0 +1,283 @@
|
|||||||
|
---
|
||||||
|
slug: opnsense-virtualization-highly-available
|
||||||
|
title: Build a Highly Available OPNsense Cluster on Proxmox VE
|
||||||
|
description: A proof of concept showing how to virtualize OPNsense on Proxmox VE, configure high availability with CARP and pfSync and handle a single WAN IP.
|
||||||
|
date: 2025-09-29
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- opnsense
|
||||||
|
- proxmox
|
||||||
|
- high-availability
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
I recently encountered my first real problem, my physical **OPNsense** box crashed because of a kernel panic, I've detailed what happened in that [post]({{< ref "post/10-opnsense-crash-disk-panic" >}}).
|
||||||
|
|
||||||
|
That failure made me rethink my setup. A unique firewall is a single point of failure, so to improve resilience I decided to take a new approach: **virtualize OPNsense**.
|
||||||
|
|
||||||
|
Of course, just running one VM wouldn’t be enough. To get real redundancy, I need two OPNsense instances in **High Availability**, with one active and the other standing by.
|
||||||
|
|
||||||
|
Before rolling this out in my network, I wanted to demonstrate the idea in my homelab. In this post, I’ll walk through the proof of concept: deploying two OPNsense VMs inside a **Proxmox VE** cluster and configuring them to provide a highly available firewall.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Current Infrastructure
|
||||||
|
|
||||||
|
On top of my setup, my ISP modem, a *Freebox* in bridge mode, connects directly to the `igc0` interface of my OPNsense box, serving as the **WAN**. On `igc1`, the **LAN** is linked to my main switch using a trunk port, with VLAN 1 as the native VLAN for my management network.
|
||||||
|
|
||||||
|
The switch also connects my three Proxmox nodes, each on trunk ports with the same native VLAN. Every node has two NICs: one for general networking and the other dedicated to the Ceph storage network, which runs through a separate 2.5 Gbps switch.
|
||||||
|
|
||||||
|
Since the OPNsense crash, I’ve simplified things by removing the LACP link, it wasn’t adding real value:
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
Until recently, Proxmox networking on my cluster was very basic: each node was configured individually with no real overlay logic. That changed after I explored Proxmox SDN, where I centralized VLAN definitions across the cluster. I described that step in [this article]({{< ref "post/11-proxmox-cluster-networking-sdn" >}}).
|
||||||
|
|
||||||
|
---
|
||||||
|
## Proof of Concept
|
||||||
|
|
||||||
|
Time to move into the lab. Here are the main steps:
|
||||||
|
1. Add some VLANs in my Homelab
|
||||||
|
2. Create Fake ISP router
|
||||||
|
3. Build two OPNsense VMs
|
||||||
|
4. Configure high availability
|
||||||
|
5. Test failover
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### Add VLANs in my Homelab
|
||||||
|
|
||||||
|
For this experiment, I create 3 new VLANs:
|
||||||
|
- **VLAN 101**: *POC WAN*
|
||||||
|
- **VLAN 102**: *POC LAN*
|
||||||
|
- **VLAN 103**: *POC pfSync*
|
||||||
|
|
||||||
|
In the Proxmox UI, I navigate to `Datacenter` > `SDN` > `VNets` and I click `Create`:
|
||||||
|

|
||||||
|
|
||||||
|
Once the 3 new VLAN have been created, I apply the configuration.
|
||||||
|
|
||||||
|
Additionally, I add these 3 VLANs in my UniFi Controller. Here only the VLAN ID and name are needed, since the controller will propagate them through the trunks connected to my Proxmox VE nodes.
|
||||||
|
|
||||||
|
### Create “Fake ISP Box“ VM
|
||||||
|
|
||||||
|
To simulate my current ISP modem, I built a VM named `fake-freebox`. This VM routes traffic between the *POC WAN* and *Lab* networks and runs a DHCP server that serves only one lease, just like my real Freebox in bridge mode.
|
||||||
|
|
||||||
|
This VM has 2 NICs, I configure Netplan with:
|
||||||
|
- `eth0` (*POC WAN* VLAN 101): static IP address `10.101.0.254/24`
|
||||||
|
- enp6s19 (Lab VLAN 66): DHCP address given by my current OPNsense router, in upstream
|
||||||
|
```yaml
|
||||||
|
network:
|
||||||
|
version: 2
|
||||||
|
ethernets:
|
||||||
|
eth0:
|
||||||
|
addresses:
|
||||||
|
- 10.101.0.254/24
|
||||||
|
enp6s19:
|
||||||
|
dhcp4: true
|
||||||
|
```
|
||||||
|
|
||||||
|
I enable packet forward to allow this VM to route traffic:
|
||||||
|
```bash
|
||||||
|
echo "net.ipv4.ip_forward=1" | sudo tee -a /etc/sysctl.conf
|
||||||
|
sudo sysctl -p
|
||||||
|
```
|
||||||
|
|
||||||
|
Then I set up masquerading so packets leaving through the lab network wouldn’t be dropped by my current OPNsense:
|
||||||
|
```bash
|
||||||
|
sudo iptables -t nat -A POSTROUTING -o enp6s19 -j MASQUERADE
|
||||||
|
sudo apt install iptables-persistent -y
|
||||||
|
sudo netfilter-persistent save
|
||||||
|
```
|
||||||
|
|
||||||
|
I install `dnsmasq` as a lightweight DHCP server:
|
||||||
|
```bash
|
||||||
|
sudo apt install dnsmasq -y
|
||||||
|
```
|
||||||
|
|
||||||
|
In `/etc/dnsmasq.conf`, I configure to serve exactly one lease (`10.101.0.150`) with DNS pointing to my current OPNsense router, in the *Lab* VLAN:
|
||||||
|
```
|
||||||
|
interface=eth0
|
||||||
|
bind-interfaces
|
||||||
|
dhcp-range=10.101.0.150,10.101.0.150,255.255.255.0,12h
|
||||||
|
dhcp-option=3,10.101.0.254 # default gateway = this VM
|
||||||
|
dhcp-option=6,192.168.66.1 # DNS server
|
||||||
|
```
|
||||||
|
|
||||||
|
I restart the `dnsmasq` service to apply the configuration:
|
||||||
|
```bash
|
||||||
|
sudo systemctl restart dnsmasq
|
||||||
|
```
|
||||||
|
|
||||||
|
The `fake-freebox` VM is now ready to serve DHCP on VLAN 101 and serve only one lease.
|
||||||
|
|
||||||
|
### Build OPNsense VMs
|
||||||
|
|
||||||
|
First I download the OPNsense ISO and upload it to one of my Proxmox nodes:
|
||||||
|

|
||||||
|
|
||||||
|
#### VM Creation
|
||||||
|
|
||||||
|
I create the first VM `poc-opnsense-1`, with the following settings:
|
||||||
|
- OS type: Linux(even though OPNsense is FreeBSD-based)
|
||||||
|
- Machine type: `q35`
|
||||||
|
- BIOS: `OVMF (UEFI)`, EFI storage on my Ceph pool
|
||||||
|
- Disk: 20 GiB also on Ceph
|
||||||
|
- CPU/RAM: 2 vCPU, 2 GiB RAM
|
||||||
|
- NICs:
|
||||||
|
1. VLAN 101 (POC WAN)
|
||||||
|
2. VLAN 102 (POC LAN)
|
||||||
|
3. VLAN 103 (POC pfSync)
|
||||||
|

|
||||||
|
|
||||||
|
ℹ️ Before booting it, I clone this VM to prepare the second one: `poc-opnsense-2`
|
||||||
|
|
||||||
|
On first boot, I hit an “access denied” error. To fix this, I enter the BIOS, go to **Device Manager > Secure Boot Configuration**, uncheck _Attempt Secure Boot_, and restart the VM:
|
||||||
|

|
||||||
|
|
||||||
|
#### OPNsense Installation
|
||||||
|
|
||||||
|
The VM boots on the ISO, I touch nothing until I get into the login screen:
|
||||||
|

|
||||||
|
|
||||||
|
I log in as `installer` / `opnsense` and launch the installer. I select the QEMU hard disk of 20GB as destination and launch the installation:
|
||||||
|

|
||||||
|
|
||||||
|
Once the installation is finished, I remove the ISO from the drive and restart the machine.
|
||||||
|
|
||||||
|
#### OPNsense Basic Configuration
|
||||||
|
|
||||||
|
After reboot, I log in as `root` / `opnsense` and get into the CLI menu:
|
||||||
|

|
||||||
|
|
||||||
|
Using option 1, I reassigned interfaces:
|
||||||
|

|
||||||
|
|
||||||
|
The WAN interface successfully pulled `10.101.0.150/24` from the `fake-freebox`. I set the LAN interface to `10.102.0.2/24` and configured a DHCP pool from `10.102.0.10` to `10.102.0.99`:
|
||||||
|

|
||||||
|
|
||||||
|
✅ The first VM is ready, I start over for the second OPNsense VM, `poc-opnsense-2` which will have the IP `10.102.0.3`
|
||||||
|
|
||||||
|
### Configure OPNsense Highly Available
|
||||||
|
|
||||||
|
With both OPNsense VMs operational, it’s time to configure them from the WebGUI. To access the interface, I connected a Windows VM into the _POC LAN_ VLAN and browsed to the OPNsense IP on port 443:
|
||||||
|

|
||||||
|
|
||||||
|
#### Add pfSync Interface
|
||||||
|
|
||||||
|
The third NIC (`vtnet2`) is assigned to the _pfSync_ interface. This dedicated network allows the two firewalls to synchronize states on the VLAN *POC pfSync*:
|
||||||
|

|
||||||
|
|
||||||
|
I enable the interface on each instance and configure it with a static IP address:
|
||||||
|
- **poc-opnsense-1**: `10.103.0.2/24`
|
||||||
|
- **poc-opnsense-2**: `10.103.0.3/24`
|
||||||
|
|
||||||
|
Then, I add a firewall rule on each node to allow all traffic coming from this network on that *pfSync* interface:
|
||||||
|

|
||||||
|
|
||||||
|
#### Setup High Availability
|
||||||
|
|
||||||
|
Next, in `System` > `High Availability` > `Settings`.
|
||||||
|
- On the master (`poc-opnsense-1`), I configure both the `General Settings` and the `Synchronization Settings`.
|
||||||
|
- On the backup (`poc-opnsense-2`), only `General Settings` are needed, you don't want your backup overwrite the master config.
|
||||||
|

|
||||||
|
|
||||||
|
Once applied, I verify synchronization on the `Status` page:
|
||||||
|

|
||||||
|
|
||||||
|
#### Create Virtual IP Address
|
||||||
|
|
||||||
|
To provide a shared gateway for clients, I create a CARP Virtual IP (VIP) on the LAN interface. It is using the Common Address Redundancy Protocol. This IP is claimed by the active node and automatically fails over.
|
||||||
|
|
||||||
|
Navigate to `Interfaces` > `Virtual IPs` > `Settings`:
|
||||||
|

|
||||||
|
|
||||||
|
To replicate the config, I go to `System > High Availability > Status` and click the button next to `Synchronize and reconfigure all`.
|
||||||
|
|
||||||
|
On the `Interfaces > Virtual IPs > Status` page, the master show the VIP as `MASTER`, while the backup report `BACKUP`.
|
||||||
|
|
||||||
|
#### Reconfigure DHCP
|
||||||
|
|
||||||
|
For HA, I need to adjust the DHCP setup. Since **Dnsmasq** does not support lease synchronization, both instances must serve leases independently.
|
||||||
|
|
||||||
|
On the master:
|
||||||
|
- `Services` > `Dnsmasq DNS & DHCP` > `General`: tick the `Disable HA sync` box.
|
||||||
|
- `DHCP ranges`: also tick the `Disable HA sync` box
|
||||||
|
- `DHCP options`: add the option `router [3]` with the value `10.102.0.1` (LAN VIP)
|
||||||
|
- `DHCP options`: clone the rule for `router [6]` pointing to the same VIP.
|
||||||
|

|
||||||
|
|
||||||
|
On the backup:
|
||||||
|
- `Services` > `Dnsmasq DNS & DHCP` > `General`: also tick the `Disable HA sync` box
|
||||||
|
- Set `DHCP reply delay` to `5` seconds, to give master priority to answer.
|
||||||
|
- `DHCP ranges`: Use a different pool, smaller (`10.102.0.200` -> `220`)
|
||||||
|
- but I also set the value `5` to `DHCP reply delay`. This would give enough time to the master node to provide a DHCP lease before the backup node. In `DHCP ranges`, I edit the current one and give a smaller pool, different than the master's. Here I also tick the `Disable HA sync` box.
|
||||||
|
|
||||||
|
This way, only DHCP options sync between nodes, while lease ranges stay separate.
|
||||||
|
|
||||||
|
#### WAN Interface
|
||||||
|
|
||||||
|
My ISP modem only provides a single DHCP lease, I don't want my 2 VMs compete to claim it. To handle this:
|
||||||
|
1. In Proxmox, I copy the MAC of the `net0` (WAN) interface from `poc-opnsense-1` and applied it to `poc-opnsense-2`. This way, the DHCP lease could be shared among the nodes.
|
||||||
|
⚠️ If both VMs bring up the same MAC, it can cause ARP conflicts and break connectivity, only the MASTER should keep its WAN active.
|
||||||
|
2. CARP event hook provides the possibility to run scripts, I deployed this [Gist script](https://gist.github.com/spali/2da4f23e488219504b2ada12ac59a7dc#file-10-wancarp) in `/usr/local/etc/rc.syshook.d/carp/10-wan` on both nodes. This ensures the WAN is active only on the MASTER, avoiding conflicts.
|
||||||
|
```php
|
||||||
|
#!/usr/local/bin/php
|
||||||
|
<?php
|
||||||
|
|
||||||
|
require_once("config.inc");
|
||||||
|
require_once("interfaces.inc");
|
||||||
|
require_once("util.inc");
|
||||||
|
require_once("system.inc");
|
||||||
|
|
||||||
|
$subsystem = !empty($argv[1]) ? $argv[1] : '';
|
||||||
|
$type = !empty($argv[2]) ? $argv[2] : '';
|
||||||
|
|
||||||
|
if ($type != 'MASTER' && $type != 'BACKUP') {
|
||||||
|
log_error("Carp '$type' event unknown from source '{$subsystem}'");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!strstr($subsystem, '@')) {
|
||||||
|
log_error("Carp '$type' event triggered from wrong source '{$subsystem}'");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
$ifkey = 'wan';
|
||||||
|
|
||||||
|
if ($type === "MASTER") {
|
||||||
|
log_error("enable interface '$ifkey' due CARP event '$type'");
|
||||||
|
$config['interfaces'][$ifkey]['enable'] = '1';
|
||||||
|
write_config("enable interface '$ifkey' due CARP event '$type'", false);
|
||||||
|
interface_configure(false, $ifkey, false, false);
|
||||||
|
} else {
|
||||||
|
log_error("disable interface '$ifkey' due CARP event '$type'");
|
||||||
|
unset($config['interfaces'][$ifkey]['enable']);
|
||||||
|
write_config("disable interface '$ifkey' due CARP event '$type'", false);
|
||||||
|
interface_configure(false, $ifkey, false, false);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test Failover
|
||||||
|
|
||||||
|
Time for the real test!
|
||||||
|
|
||||||
|
OPNsense provides a _CARP Maintenance Mode_. With the master active, WAN was enabled only on that node. Entering maintenance mode flipped the roles: the master became backup, its WAN disabled, while the backup enabled its WAN:
|
||||||
|

|
||||||
|
|
||||||
|
While pinging outside the network, I observed zero packet loss during the failover.
|
||||||
|
|
||||||
|
Finally, I simulated a crash by powering off the master. The backup took over seamlessly, I saw only one dropped packet, and thanks to state synchronization, even my SSH session stayed alive. 🎉
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
This proof of concept showed that running **OPNsense in high availability on Proxmox VE** is possible, even with a single WAN IP address. To achieve this, I needed these components:
|
||||||
|
- VLAN segmentation.
|
||||||
|
- Dedicated pfSync network.
|
||||||
|
- Shared virtual IP.
|
||||||
|
- Script to manage the WAN interface.
|
||||||
|
|
||||||
|
The setup behave exactly as expected, seamless failover, synchronized firewall states, and even live sessions surviving a node crash. The most delicate part was handling the WAN lease, since my ISP modem only provides one IP, but the CARP hook script solved that challenge.
|
||||||
|
|
||||||
|
🚀 The next milestone will be to prepare a new OPNsense HA cluster with the aim to completely replace my current physical box. Stay tuned!
|
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
slug:
|
||||||
|
title: Template
|
||||||
|
description:
|
||||||
|
date:
|
||||||
|
draft: true
|
||||||
|
tags:
|
||||||
|
- opnsense
|
||||||
|
- high-availability
|
||||||
|
- proxmox
|
||||||
|
categories:
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
In my previous [post]({{< ref "post/12-opnsense-virtualization-highly-available" >}}), I've set up a PoC to validate the possibility to create a cluster of 2 **OPNsense** VMs in **Proxmox VE** and make the firewall highly available.
|
||||||
|
|
||||||
|
This time, I will cover the creation of my future OPNsense cluster from scratch, plan the cut over and finally migrate from my current physical box.
|
||||||
|
|
||||||
|
## Build the Foundation
|
||||||
|
|
||||||
|
For the real thing, I'll have to connect the WAN, coming from my ISP box, to my main switch. For that I have to add a VLAN to transport this flow to my Proxmox nodes.
|
||||||
|
|
||||||
|
### UniFi
|
||||||
|
|
||||||
|
The first thing I do is to configure my layer 2 network which is managed by UniFi. There I need to create two VLANs:
|
||||||
|
- *WAN* (20): transport the WAN between my ISP box and my Proxmox nodes.
|
||||||
|
- *pfSync* (44), communication between my OPNsense nodes.
|
||||||
|
|
||||||
|
In the UniFi controller, in `Settings` > `Networks`, I add a `New Virtual Network`. I name it `WAN` and give it the VLAN ID 20:
|
||||||
|

|
||||||
|
|
||||||
|
I do the same thing again for the `pfSync` VLAN with the VLAN ID 44.
|
||||||
|
|
||||||
|
I will plug my ISP box on the port 15 of my switch, which is disabled for now. I set it as active, set the native VLAN on the newly created one `WAN (20)` and disable trunking:
|
||||||
|

|
||||||
|
|
||||||
|
Once this setting applied, I make sure that only the ports where are connected my Proxmox nodes propagate these VLAN on their trunk.
|
||||||
|
|
||||||
|
We are done with UniFi configuration.
|
||||||
|
|
||||||
|
### Proxmox SDN
|
||||||
|
|
||||||
|
Now that the VLAN can reach my nodes, I want to handle it in the Proxmox SDN.
|
||||||
|
|
||||||
|
In `Datacenter` > `SDN` > `VNets`, I create a new VNet, name it `vlan20` to follow my own naming convention, give it the *WAN* alias and use the tag (ID) 20:
|
||||||
|

|
||||||
|
|
||||||
|
I also create the `vlan44` for the *pfSync* VLAN, then I apply this configuration and we are done with the SDN.
|
||||||
|
|
||||||
|
## Create the VMs
|
||||||
|
|
||||||
|
Now that the VLAN configuration is done, I can start buiding my VMs.
|
||||||
|
|
||||||
|
I don't want to go into much details about the VM creation, I already detailed it in the previous [post]({{< ref "post/12-opnsense-virtualization-highly-available" >}}).
|
||||||
|
|
||||||
|
The first VM is named `cerbere-head1`
|
@@ -53,7 +53,7 @@ L'idée est simple :
|
|||||||
1. J'écris le contenu de mon blog dans mon vault Obsidian, sous un dossier `Blog`.
|
1. J'écris le contenu de mon blog dans mon vault Obsidian, sous un dossier `Blog`.
|
||||||
2. Une fois le fichier modifié, le plugin Git Obsidian effectue automatiquement les commits et les poussent vers le dépôt Gitea.
|
2. Une fois le fichier modifié, le plugin Git Obsidian effectue automatiquement les commits et les poussent vers le dépôt Gitea.
|
||||||
3. Lorsque Gitea reçoit ce push, une première Gitea Action est déclenchée.
|
3. Lorsque Gitea reçoit ce push, une première Gitea Action est déclenchée.
|
||||||
4. La première action synchronise le contenu du blog mis à jour avec un autre dépôt [Git distinct](https://git.vezpi.me/Vezpi/blog) qui héberge le contenu.
|
4. La première action synchronise le contenu du blog mis à jour avec un autre dépôt [Git distinct](https://git.vezpi.com/Vezpi/blog) qui héberge le contenu.
|
||||||
5. Dans ce dépôt, une autre Gitea Action est déclenchée.
|
5. Dans ce dépôt, une autre Gitea Action est déclenchée.
|
||||||
6. La deuxième Gitea Action génère les pages web statiques tout en mettant à jour Hugo si nécessaire.
|
6. La deuxième Gitea Action génère les pages web statiques tout en mettant à jour Hugo si nécessaire.
|
||||||
7. Le blog est maintenant mis à jour (celui que vous lisez).
|
7. Le blog est maintenant mis à jour (celui que vous lisez).
|
||||||
@@ -78,7 +78,7 @@ Le vault Obsidian est un dépôt Git privé self-hosted dans Gitea. J'utilise Do
|
|||||||
container_name: gitea_runner
|
container_name: gitea_runner
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
environment:
|
environment:
|
||||||
- GITEA_INSTANCE_URL=https://git.vezpi.me
|
- GITEA_INSTANCE_URL=https://git.vezpi.com
|
||||||
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
|
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
|
||||||
- GITEA_RUNNER_NAME=self-hosted
|
- GITEA_RUNNER_NAME=self-hosted
|
||||||
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
|
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
|
||||||
@@ -140,7 +140,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Clone the blog repository
|
- name: Clone the blog repository
|
||||||
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git
|
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git
|
||||||
|
|
||||||
- name: Transfer blog content from Obsidian
|
- name: Transfer blog content from Obsidian
|
||||||
run: |
|
run: |
|
||||||
|
@@ -52,7 +52,7 @@ The idea is simple:
|
|||||||
1. I write blog content in my Obsidian vault, under a specific `Blog` folder.
|
1. I write blog content in my Obsidian vault, under a specific `Blog` folder.
|
||||||
2. When I'm done editing the file, the Obisdian Git plugin automatically commits and push updates to the Gitea repository
|
2. When I'm done editing the file, the Obisdian Git plugin automatically commits and push updates to the Gitea repository
|
||||||
3. When Gitea receives that push, a first Gitea Action is triggered.
|
3. When Gitea receives that push, a first Gitea Action is triggered.
|
||||||
4. The first action syncs the updated blog content to another separate [Git repository](https://git.vezpi.me/Vezpi/blog) which hosts my blog content.
|
4. The first action syncs the updated blog content to another separate [Git repository](https://git.vezpi.com/Vezpi/blog) which hosts my blog content.
|
||||||
5. In that blog repository, another Gitea Action is triggered.
|
5. In that blog repository, another Gitea Action is triggered.
|
||||||
6. The second Gitea Action generates the static web pages while upgrading Hugo if needed
|
6. The second Gitea Action generates the static web pages while upgrading Hugo if needed
|
||||||
7. The blog is now updated (the one you are reading).
|
7. The blog is now updated (the one you are reading).
|
||||||
@@ -77,7 +77,7 @@ The Obsidian vault is a private Git repository self-hosted in Gitea. I use docke
|
|||||||
container_name: gitea_runner
|
container_name: gitea_runner
|
||||||
restart: on-failure
|
restart: on-failure
|
||||||
environment:
|
environment:
|
||||||
- GITEA_INSTANCE_URL=https://git.vezpi.me
|
- GITEA_INSTANCE_URL=https://git.vezpi.com
|
||||||
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
|
- GITEA_RUNNER_REGISTRATION_TOKEN=${GITEA_RUNNER_REGISTRATION_TOKEN}$
|
||||||
- GITEA_RUNNER_NAME=self-hosted
|
- GITEA_RUNNER_NAME=self-hosted
|
||||||
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
|
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine
|
||||||
@@ -139,7 +139,7 @@ jobs:
|
|||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
- name: Clone the blog repository
|
- name: Clone the blog repository
|
||||||
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git
|
run: git clone https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git
|
||||||
|
|
||||||
- name: Transfer blog content from Obsidian
|
- name: Transfer blog content from Obsidian
|
||||||
run: |
|
run: |
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
slug: blog-deployment-ci-cd-pipeline-gitea-actions
|
slug: blog-deployment-ci-cd-pipeline-gitea-actions
|
||||||
title: Pipeline CI/CD du Déploiment du Blog avec Gitea Actions
|
title: Pipeline CI/CD du Déploiment du Blog avec Gitea Actions
|
||||||
description: Comment j'ai sécurisé le déploiement automatisé de mon blog self-hosted construit avec Hugo en mettant en place un pipeline CI/CD à l'aide de Gitea Actions
|
description: Comment j'ai sécurisé le déploiement automatisé de mon blog self-hosted construit avec Hugo en mettant en place un pipeline CI/CD à l'aide de Gitea Actions.
|
||||||
date: 2025-06-05
|
date: 2025-06-05
|
||||||
draft: false
|
draft: false
|
||||||
tags:
|
tags:
|
||||||
@@ -20,7 +20,7 @@ Le blog étant redéployé de façon automatique à chaque modification du conte
|
|||||||
|
|
||||||
## Sécuriser le Déploiement du Blog
|
## Sécuriser le Déploiement du Blog
|
||||||
|
|
||||||
Aujourd'hui mon blog se redéploie automatiquement à chaque modification de la branche `main` du [dépôt Git](https://git.vezpi.me/Vezpi/Blog) de mon instance **Gitea** via une **Gitea Actions**. Chaque modification apportée à mon vault **Obsidian** est poussée automatiquement dans cette branche.
|
Aujourd'hui mon blog se redéploie automatiquement à chaque modification de la branche `main` du [dépôt Git](https://git.vezpi.com/Vezpi/Blog) de mon instance **Gitea** via une **Gitea Actions**. Chaque modification apportée à mon vault **Obsidian** est poussée automatiquement dans cette branche.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -96,7 +96,7 @@ Par défaut, au lancement d'un conteneur `nginx`, il se contente de lancer le se
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
REPO_URL="${REPO_URL:-https://git.vezpi.me/Vezpi/blog.git}"
|
REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
|
||||||
URL="${URL:-blog.vezpi.com}"
|
URL="${URL:-blog.vezpi.com}"
|
||||||
BRANCH="${BRANCH:-preview}"
|
BRANCH="${BRANCH:-preview}"
|
||||||
CLONE_DIR="${CLONE_DIR:-/blog}"
|
CLONE_DIR="${CLONE_DIR:-/blog}"
|
||||||
@@ -177,7 +177,7 @@ Voici la nouvelle configuration de mon `runner` dans ma stack Gitea, gérée par
|
|||||||
container_name: gitea_runner
|
container_name: gitea_runner
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
- GITEA_INSTANCE_URL=https://git.vezpi.me
|
- GITEA_INSTANCE_URL=https://git.vezpi.com
|
||||||
- GITEA_RUNNER_REGISTRATION_TOKEN=<token>
|
- GITEA_RUNNER_REGISTRATION_TOKEN=<token>
|
||||||
- GITEA_RUNNER_NAME=self-hosted
|
- GITEA_RUNNER_NAME=self-hosted
|
||||||
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
|
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
|
||||||
@@ -241,7 +241,7 @@ jobs:
|
|||||||
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
|
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
|
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
|
||||||
|
|
||||||
- name: Check Latest Hugo Version
|
- name: Check Latest Hugo Version
|
||||||
id: get_latest
|
id: get_latest
|
||||||
@@ -296,7 +296,7 @@ jobs:
|
|||||||
shell: sh
|
shell: sh
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
|
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
|
||||||
|
|
||||||
- name: Build Docker Image
|
- name: Build Docker Image
|
||||||
run: |
|
run: |
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
slug: blog-deployment-ci-cd-pipeline-gitea-actions
|
slug: blog-deployment-ci-cd-pipeline-gitea-actions
|
||||||
title: Blog Deployment CI/CD Pipeline using Gitea Actions
|
title: Blog Deployment CI/CD Pipeline using Gitea Actions
|
||||||
description: How I secured the automated deployment of my self-hosted blog built with Hugo by setting up a CI/CD pipeline using Gitea Actions
|
description: How I secured the automated deployment of my self-hosted blog built with Hugo by setting up a CI/CD pipeline using Gitea Actions.
|
||||||
date: 2025-06-05
|
date: 2025-06-05
|
||||||
draft: false
|
draft: false
|
||||||
tags:
|
tags:
|
||||||
@@ -20,7 +20,7 @@ Since the blog is automatically redeployed every time I modify content in Obsidi
|
|||||||
|
|
||||||
## Securing the Blog Deployment
|
## Securing the Blog Deployment
|
||||||
|
|
||||||
Currently, my blog redeploys automatically on every change to the `main` branch of the [Git repository](https://git.vezpi.me/Vezpi/Blog) hosted on my **Gitea** instance, using a **Gitea Actions** workflow. Every change made in my **Obsidian** vault is automatically pushed to this branch.
|
Currently, my blog redeploys automatically on every change to the `main` branch of the [Git repository](https://git.vezpi.com/Vezpi/Blog) hosted on my **Gitea** instance, using a **Gitea Actions** workflow. Every change made in my **Obsidian** vault is automatically pushed to this branch.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -96,7 +96,7 @@ By default, a `nginx` container simply starts the web server. But here I wanted
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
REPO_URL="${REPO_URL:-https://git.vezpi.me/Vezpi/blog.git}"
|
REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
|
||||||
URL="${URL:-blog.vezpi.com}"
|
URL="${URL:-blog.vezpi.com}"
|
||||||
BRANCH="${BRANCH:-preview}"
|
BRANCH="${BRANCH:-preview}"
|
||||||
CLONE_DIR="${CLONE_DIR:-/blog}"
|
CLONE_DIR="${CLONE_DIR:-/blog}"
|
||||||
@@ -177,7 +177,7 @@ Here is the new configuration of my `runner` in my Gitea stack, also managed via
|
|||||||
container_name: gitea_runner
|
container_name: gitea_runner
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
- GITEA_INSTANCE_URL=https://git.vezpi.me
|
- GITEA_INSTANCE_URL=https://git.vezpi.com
|
||||||
- GITEA_RUNNER_REGISTRATION_TOKEN=<token>
|
- GITEA_RUNNER_REGISTRATION_TOKEN=<token>
|
||||||
- GITEA_RUNNER_NAME=self-hosted
|
- GITEA_RUNNER_NAME=self-hosted
|
||||||
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
|
- GITEA_RUNNER_LABELS=ubuntu:docker://node:lts,alpine:docker://node:lts-alpine,docker:docker://docker:cli
|
||||||
@@ -241,7 +241,7 @@ jobs:
|
|||||||
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
|
docker_folder_changed: ${{ steps.docker_folder.outputs.changed }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
|
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
|
||||||
|
|
||||||
- name: Check Latest Hugo Version
|
- name: Check Latest Hugo Version
|
||||||
id: get_latest
|
id: get_latest
|
||||||
@@ -296,7 +296,7 @@ jobs:
|
|||||||
shell: sh
|
shell: sh
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repository
|
- name: Checkout Repository
|
||||||
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.me/Vezpi/blog.git .
|
run: git clone --branch preview https://${{ secrets.REPO_TOKEN }}@git.vezpi.com/Vezpi/blog.git .
|
||||||
|
|
||||||
- name: Build Docker Image
|
- name: Build Docker Image
|
||||||
run: |
|
run: |
|
||||||
|
380
content/post/5-notification-system-gotify-vs-ntfy.fr.md
Normal file
@@ -0,0 +1,380 @@
|
|||||||
|
---
|
||||||
|
slug: notification-system-gotify-vs-ntfy
|
||||||
|
title: Test de Gotify et Ntfy, un système de notifications self-hosted
|
||||||
|
description: Gotify ou Ntfy ? J'ai testé les deux pour créer un système de notifications fiable et self-hosted pour mon homelab, et intégré à un pipeline CI/CD.
|
||||||
|
date: 2025-06-13
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- notification
|
||||||
|
- ntfy
|
||||||
|
- gotify
|
||||||
|
- ci-cd
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Pour savoir ce qui se passe dans mon homelab et être averti quand quelque chose ne va pas, je veux mettre en place un système de notifications où (presque) n'importe quoi pourrait m'envoyer un message que je recevrais sur mon mobile.
|
||||||
|
|
||||||
|
Par le passé, j’utilisais **Pushover**, qui était très bien, mais je veux explorer de nouvelles options, plus modernes et éventuellement self-hosted.
|
||||||
|
|
||||||
|
## Choisir le Bon Système de Notifications
|
||||||
|
|
||||||
|
Les éléments clés pour déterminer le bon système pour moi seraient :
|
||||||
|
- **Application Android** : obligatoire, une interface élégante et intuitive est important.
|
||||||
|
- **Intégration** : je veux que le service soit intégré partout où je veux être notifié.
|
||||||
|
- **Self-hosted** : l’héberger moi-même est toujours mieux pour la confidentialité.
|
||||||
|
|
||||||
|
Après une recherche rapide, les outils les plus adaptés sur le marché sont :
|
||||||
|
- **Ntfy**
|
||||||
|
- **Gotify**
|
||||||
|
|
||||||
|
Étant donné les commentaires sur internet et après avoir testé rapidement les deux applications Android, je ne peux pas vraiment décider. Je pense que Ntfy est la meilleure option, mais je vais installer et tester les deux pour me faire une idée !
|
||||||
|
|
||||||
|
## Gotify
|
||||||
|
|
||||||
|
J’avais entendu parler de Gotify il y a quelque temps, en fait avant même de regarder d'autres alternatives, j'avais celui-ci en tête. J’ai rapidement jeté un œil à sa [documentation](https://gotify.net/docs/) et cela semble assez simple.
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
Comme d’habitude, je vais déployer le serveur Gotify avec `docker compose` sur `dockerVM`, une VM hébergeant mes applications sous forme de conteneurs Docker. Je crée un nouveau dossier `gotify` dans `/appli/docker/` et je colle mon template de `docker-compose.yml` dedans.
|
||||||
|
|
||||||
|
`docker-compose.yml`
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
gotify:
|
||||||
|
image: gotify/server
|
||||||
|
container_name: gotify
|
||||||
|
volumes:
|
||||||
|
- /appli/data/gotify/data/:/app/data
|
||||||
|
environment:
|
||||||
|
- TZ=Europe/Paris
|
||||||
|
- GOTIFY_DEFAULTUSER_NAME=${GOTIFY_DEFAULTUSER_NAME}
|
||||||
|
- GOTIFY_DEFAULTUSER_PASS=${GOTIFY_DEFAULTUSER_PASS}
|
||||||
|
networks:
|
||||||
|
- web
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.gotify.rule=Host(`gotify.vezpi.me`)
|
||||||
|
- traefik.http.routers.gotify.entrypoints=https
|
||||||
|
- traefik.http.routers.gotify.tls.certresolver=letsencrypt
|
||||||
|
- traefik.http.services.gotify.loadbalancer.server.port=80
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
networks:
|
||||||
|
web:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
`.env`
|
||||||
|
```
|
||||||
|
GOTIFY_DEFAULTUSER_NAME=vez
|
||||||
|
GOTIFY_DEFAULTUSER_PASS=<password>
|
||||||
|
```
|
||||||
|
|
||||||
|
Dans la [documentation](https://gotify.net/docs/config), je vois que plusieurs moteurs de base de données peuvent être utilisés, par défaut c’est **sqlite3** qui est utilisé, ce qui ira très bien pour le test. Passer à **PostgreSQL** pourrait être une option si je décide de garder Gotify. Sur cette même page, je vois les différentes variables d’environnement que je peux utiliser pour configurer le serveur depuis le fichier `docker-compose.yml`.
|
||||||
|
|
||||||
|
Quand mes fichiers de configuration sont prêts, je crée une nouvelle entrée dans mon plugin Caddy sur OPNsense pour rediriger ma nouvelle URL Gotify : [https://gotify.vezpi.me](https://gotify.vezpi.me).
|
||||||
|
|
||||||
|
Je crée également le dossier `/appli/data/gotify/data/` dans `dockerVM` pour le monter comme volume et stocker les données :
|
||||||
|
```bash
|
||||||
|
mkdir -p /appli/data/gotify/data/
|
||||||
|
```
|
||||||
|
|
||||||
|
Enfin, je lance la stack docker :
|
||||||
|
```bash
|
||||||
|
$ docker compose up -d
|
||||||
|
[+] Running 5/5
|
||||||
|
✔ gotify Pulled
|
||||||
|
✔ 63ce8e957633 Pull complete
|
||||||
|
✔ e7def9680541 Pull complete
|
||||||
|
✔ 9a1821c438b4 Pull complete
|
||||||
|
✔ ad316556c9ff Pull complete
|
||||||
|
[+] Running 1/1
|
||||||
|
✔ Container gotify Started
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ Atteindre l’URL [https://gotify.vezpi.me](https://gotify.vezpi.me) m’affiche la page de connexion Gotify :
|
||||||
|

|
||||||
|
|
||||||
|
Après connexion, j’accède au tableau de bord, sans messages évidemment :
|
||||||
|

|
||||||
|
|
||||||
|
### Créer une Application
|
||||||
|
|
||||||
|
Pour permettre l’envoi de messages, je dois d’abord créer une application pour laquelle les messages seront regroupés. Cela peut se faire de deux manières :
|
||||||
|
- **WebUI**
|
||||||
|
- **REST-API**
|
||||||
|
|
||||||
|
Pour le test, j’utiliserai la WebUI, je clique sur le bouton `APPS` en haut puis `CREATE APPLICATION`. Je choisis un magnifique nom d'application et une description.
|
||||||
|

|
||||||
|
|
||||||
|
Une fois mon application créée, un token est généré pour celle-ci. Je peux modifier l’application pour changer quoi que ce soit, je peux aussi uploader une icône.
|
||||||
|

|
||||||
|
|
||||||
|
### Tests
|
||||||
|
|
||||||
|
Mon application est maintenant visible dans la barre latérale, testons maintenant l’envoi d’un message. Pour l’envoyer, je peux utiliser `curl` et j’ai besoin du token de l’application.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl "https://gotify.vezpi.me/message?token=<apptoken>" -F "title=Cooked!" -F "message=The potoaries are ready!" -F "priority=5"
|
||||||
|
```
|
||||||
|
Je reçois instantanément la notification sur mon mobile et dans mon navigateur.
|
||||||
|
|
||||||
|
Je renvoie un autre message mais avec une priorité plus basse : `-2`. Je ne reçois pas de notification dans mon navigateur, je remarque une légère différence entre les deux messages. Sur mon mobile, seule ma montre la reçoit, je ne la vois pas sur l’écran, mais je la retrouve dans le centre de notifications.
|
||||||
|

|
||||||
|
|
||||||
|
### Application Android
|
||||||
|
|
||||||
|
Voici quelques captures d’écran depuis mon appareil Android :
|
||||||
|

|
||||||
|
|
||||||
|
Pour une raison inconnue, une notification apparaît aléatoirement pour me dire que je suis connecté à Gotify :
|
||||||
|

|
||||||
|
|
||||||
|
### Conclusion
|
||||||
|
|
||||||
|
Dans la [documentation](https://gotify.net/docs/msgextras), j’ai trouvé quelques fonctionnalités supplémentaires, comme l’ajout d’images ou d’actions cliquables. En résumé, ça fait le job, c’est tout. Le processus d’installation est simple, l’utilisation n’est pas compliquée, mais je dois créer une application pour obtenir un token, puis ajouter ce token à chaque fois que je veux envoyer un message.
|
||||||
|
|
||||||
|
## Ntfy
|
||||||
|
|
||||||
|
Ntfy semble très propre, installons-le et voyons ce qu’il propose !
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
Même histoire ici avec `docker compose` sur `dockerVM`. Je crée un nouveau dossier `ntfy` dans `/appli/docker/` et je colle le template de `docker-compose.yml`.
|
||||||
|
|
||||||
|
`docker-compose.yml`
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
ntfy:
|
||||||
|
image: binwiederhier/ntfy
|
||||||
|
container_name: ntfy
|
||||||
|
command:
|
||||||
|
- serve
|
||||||
|
volumes:
|
||||||
|
- /appli/data/ntfy/data:/var/cache/ntfy
|
||||||
|
environment:
|
||||||
|
- TZ=Europe/Paris
|
||||||
|
- NTFY_BASE_URL=https://ntfy.vezpi.me
|
||||||
|
- NTFY_CACHE_FILE=/var/cache/ntfy/cache.db
|
||||||
|
- NTFY_AUTH_FILE=/var/cache/ntfy/auth.db
|
||||||
|
- NTFY_ATTACHMENT_CACHE_DIR=/var/cache/ntfy/attachments
|
||||||
|
- NTFY_AUTH_DEFAULT_ACCESS=deny-all
|
||||||
|
- NTFY_BEHIND_PROXY=true
|
||||||
|
- NTFY_ENABLE_LOGIN=true
|
||||||
|
user: 1000:1000
|
||||||
|
networks:
|
||||||
|
- web
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.ntfy.rule=Host(`ntfy.vezpi.me`)
|
||||||
|
- traefik.http.routers.ntfy.entrypoints=https
|
||||||
|
- traefik.http.routers.ntfy.tls.certresolver=letsencrypt
|
||||||
|
- traefik.http.services.ntfy.loadbalancer.server.port=80
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "wget -q --tries=1 http://ntfy:80/v1/health -O - | grep -Eo '\"healthy\"\\s*:\\s*true' || exit 1"]
|
||||||
|
interval: 60s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
networks:
|
||||||
|
web:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
Je crée aussi le dossier de volume persistant `/appli/data/ntfy/data/` dans `dockerVM` :
|
||||||
|
```bash
|
||||||
|
mkdir -p /appli/data/ntfy/data/
|
||||||
|
```
|
||||||
|
|
||||||
|
La [documentation](https://docs.ntfy.sh/config/) est impressionnante, j’ai essayé de rassembler la config pour un démarrage rapide. Je devrais être bon pour lancer le serveur.
|
||||||
|
|
||||||
|
Encore une fois ici, je crée un nouveau domaine pour mon proxy inverse Caddy sur OPNsense avec l’URL [https://ntfy.vezpi.me](https://ntfy.vezpi.me).
|
||||||
|
```bash
|
||||||
|
$ docker compose up -d
|
||||||
|
[+] Running 4/4
|
||||||
|
✔ ntfy Pulled
|
||||||
|
✔ f18232174bc9 Already exists
|
||||||
|
✔ f5bf7a328fac Pull complete
|
||||||
|
✔ 572c745ef6c3 Pull complete
|
||||||
|
[+] Running 1/1
|
||||||
|
✔ Container ntfy Started
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ L’URL [https://ntfy.vezpi.me](https://ntfy.vezpi.me) me donne accès au tableau de bord Ntfy :
|
||||||
|

|
||||||
|
|
||||||
|
Au départ je n’ai aucun utilisateur et aucun n’est créé par défaut. Comme j’ai interdit tout accès anonyme dans la config, je dois en créer un.
|
||||||
|
|
||||||
|
Pour lister les utilisateurs, je peux utiliser cette commande :
|
||||||
|
```bash
|
||||||
|
$ docker exec -it ntfy ntfy user list
|
||||||
|
user * (role: anonymous, tier: none)
|
||||||
|
- no topic-specific permissions
|
||||||
|
- no access to any (other) topics (server config)
|
||||||
|
```
|
||||||
|
|
||||||
|
Je crée un utilisateur avec les privilèges d’administration :
|
||||||
|
```bash
|
||||||
|
$ docker exec -it ntfy ntfy user add --role=admin vez
|
||||||
|
user vez added with role admin
|
||||||
|
```
|
||||||
|
|
||||||
|
Je peux maintenant me connecter à l’interface Web, et passer en mode sombre, mes yeux me remercient.
|
||||||
|
|
||||||
|
### Topics
|
||||||
|
|
||||||
|
Dans Ntfy, il n’y a pas d’applications à créer, mais les messages sont regroupés dans des topics, plus lisibles qu’un token lors de l’envoi. Une fois le topic créé, je peux changer le nom d’affichage ou envoyer des messages de test. Sur l’interface Web, cependant, je ne trouve aucune option pour changer l’icône, alors que c’est possible depuis l’application Android, ce qui n’est pas très pratique.
|
||||||
|

|
||||||
|
### Tests
|
||||||
|
|
||||||
|
Envoyer un message est en fait plus difficile que prévu. Comme j’ai activé l’authentification, je dois aussi m’authentifier pour envoyer des messages :
|
||||||
|
```
|
||||||
|
curl \
|
||||||
|
-H "Title: Cooked!" \
|
||||||
|
-H "Priority: high" \
|
||||||
|
-d "The potatoes are ready!" \
|
||||||
|
-u "vez:<password>" \
|
||||||
|
https://ntfy.vezpi.me/patato
|
||||||
|
```
|
||||||
|
|
||||||
|
### Application Android
|
||||||
|
|
||||||
|
Voici quelques captures de l’application Android Ntfy :
|
||||||
|

|
||||||
|
|
||||||
|
### Conclusion
|
||||||
|
|
||||||
|
Ntfy est une belle application avec une [documentation](https://docs.ntfy.sh/) vraiment solide. Les possibilités sont infinies et la liste des intégrations est impressionnante. L’installation n’était pas difficile mais demandait un peu plus de configuration. Le besoin d’utiliser la CLI pour configurer les utilisateurs et les permissions n’est pas très pratique.
|
||||||
|
|
||||||
|
Sur l’application Android, je regrette qu’il n’y ait pas une vue pour voir tous les messages des différents topics. En revanche, sur l’interface Web, j’aurais aimé pouvoir définir les icônes des topics. Ce que j’ai trouvé intéressant, c’est la possibilité d’avoir des topics depuis différents serveurs.
|
||||||
|
|
||||||
|
## Comparaison
|
||||||
|
|
||||||
|
**Gotify** est simple, tous les utilisateurs auront accès à toutes les applications. Pas besoin d'identifiant utilisateur pour envoyer des messages, seulement le token de l’application. L’application Android est efficace, mais personnellement, même si l’icône est amusante, je ne l’aime pas trop.
|
||||||
|
|
||||||
|
**Ntfy** semble plus avancé et complet, avec des permissions plus précises. L’interface est élégante tout en restant simple, les possibilités sont infinies.
|
||||||
|
|
||||||
|
Dans l’ensemble, seuls de petits détails me font préférer Ntfy à Gotify, par exemple, avoir accès à des topics de différents serveurs, les ACL ou la possibilité d’ajouter des émojis aux messages, mais les deux applications remplissent bien leur rôle.
|
||||||
|
|
||||||
|
## Implémentation de Notifications Réelles
|
||||||
|
|
||||||
|
Pendant que je mettais en place mon pipeline CI/CD pour le déploiement de mon blog, je voulais être averti chaque fois que quelque chose se passe, voyons comment je peux l’implémenter avec Ntfy.
|
||||||
|
|
||||||
|
### Contrôle d’Accès
|
||||||
|
|
||||||
|
Je pourrais utiliser mon utilisateur `admin` pour envoyer les messages depuis le pipeline et les recevoir sur mon appareil Android, même si c’est plus simple à configurer, je veux appliquer le principe de moindre privilège, ce que Ntfy permet. Je vais donc créer un utilisateur dédié pour mon pipeline CI/CD et un autre pour mon appareil Android.
|
||||||
|
|
||||||
|
#### Utilisateur Pipeline
|
||||||
|
|
||||||
|
Celui-ci ne pourra qu'envoyer des messages sur le topic `blog`, je l’appelle `gitea_blog`.
|
||||||
|
```bash
|
||||||
|
$ ntfy user add gitea_blog
|
||||||
|
user gitea_blog added with role user
|
||||||
|
$ ntfy access gitea_blog blog wo
|
||||||
|
granted write-only access to topic blog
|
||||||
|
|
||||||
|
user gitea_blog (role: user, tier: none)
|
||||||
|
- write-only access to topic blog
|
||||||
|
```
|
||||||
|
|
||||||
|
Je teste rapidement l’envoi d’un message sur ce topic :
|
||||||
|
```bash
|
||||||
|
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/blog
|
||||||
|
{"id":"xIgwz9dr1w9Z","time":1749587681,"expires":1749630881,"event":"message","topic":"blog","message":"Message test from gitea_blog!"}
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
✅ Message reçu !
|
||||||
|
|
||||||
|
Je tente aussi un envoi sur mon topic de test :
|
||||||
|
```bash
|
||||||
|
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/potato
|
||||||
|
{"code":40301,"http":403,"error":"forbidden","link":"https://ntfy.sh/docs/publish/#authentication"}
|
||||||
|
```
|
||||||
|
❌ Refusé comme attendu.
|
||||||
|
|
||||||
|
#### Utilisateur Android
|
||||||
|
|
||||||
|
Depuis mon appareil Android, je veux uniquement recevoir les messages, mais sur tous les topics. Je crée l’utilisateur `android_s25u` :
|
||||||
|
```bash
|
||||||
|
$ ntfy user add android_s25u
|
||||||
|
user android_s25u added with role user
|
||||||
|
$ ntfy access android_s25u "*" ro
|
||||||
|
granted read-only access to topic *
|
||||||
|
|
||||||
|
user android_s25u (role: user, tier: none)
|
||||||
|
- read-only access to topic *
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ Après avoir configuré l’utilisateur dans l’application Android Ntfy, je peux lire mes messages sur `https://ntfy.vezpi.me/blog` et aussi sur le topic de test.
|
||||||
|
|
||||||
|
### Implémentation
|
||||||
|
|
||||||
|
Maintenant que mes utilisateurs sont prêts, je veux ajouter un job `Notify` dans mon pipeline CI/CD pour le déploiement du blog dans **Gitea**, vous pouvez retrouver le workflow complet dans [cet article]({{< ref "post/4-blog-deployment-ci-cd-pipeline-gitea-actions" >}}).
|
||||||
|
|
||||||
|
#### Créer un Secret
|
||||||
|
|
||||||
|
Pour permettre à mon Gitea Runner d’utiliser l’utilisateur `gitea_blog` dans ses jobs, je veux créer un secret. J’explore le dépôt Gitea `Blog` dans `Settings`, puis `Actions` > `Secrets` > `Add Secret`. J’y mets la valeur du secret au format `<utilisateur>:<password>` :
|
||||||
|

|
||||||
|
|
||||||
|
### Écrire le Code `Notify`
|
||||||
|
|
||||||
|
Je peux maintenant écrire le code qui m’enverra un message quand un nouveau déploiement se produit.
|
||||||
|
|
||||||
|
Si le déploiement est un succès, la priorité sera minimale, pas besoin de notification sur mon mobile, juste pour garder une trace dans l’application Android Ntfy si besoin.
|
||||||
|
|
||||||
|
Si quelque chose échoue, je veux être notifié sur mon mobile avec une priorité plus élevée. Ntfy me permet d’ajouter des actions sur mes notifications, je vais en créer 2 :
|
||||||
|
- **View Run** : Lien direct vers le workflow dans Gitea pour voir ce qu’il s’est passé.
|
||||||
|
- **Verify Blog** : Lien vers le blog pour vérifier qu’il est toujours en ligne.
|
||||||
|
```yaml
|
||||||
|
Notify:
|
||||||
|
needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
|
||||||
|
runs-on: ubuntu
|
||||||
|
if: always()
|
||||||
|
env:
|
||||||
|
NTFY_URL: https://ntfy.vezpi.me
|
||||||
|
NTFY_TOPIC: blog
|
||||||
|
NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
|
||||||
|
steps:
|
||||||
|
- name: Notify Workflow Result
|
||||||
|
run: |
|
||||||
|
if [[
|
||||||
|
"${{ needs.Check-Rebuild.result }}" == "success" &&
|
||||||
|
("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
|
||||||
|
"${{ needs.Deploy-Staging.result }}" == "success" &&
|
||||||
|
"${{ needs.Test-Staging.result }}" == "success" &&
|
||||||
|
"${{ needs.Merge.result }}" == "success" &&
|
||||||
|
"${{ needs.Deploy-Production.result }}" == "success" &&
|
||||||
|
"${{ needs.Test-Production.result }}" == "success" &&
|
||||||
|
("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
|
||||||
|
]]; then
|
||||||
|
curl -H "Priority: min" \
|
||||||
|
-H "Tags: white_check_mark" \
|
||||||
|
-d "Blog workflow completed successfully." \
|
||||||
|
-u ${NTFY_TOKEN} \
|
||||||
|
${NTFY_URL}/${NTFY_TOPIC}
|
||||||
|
else
|
||||||
|
curl -H "Priority: high" \
|
||||||
|
-H "Tags: x" \
|
||||||
|
-H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
|
||||||
|
view, Verify Blog, https://blog.vezpi.com, clear=true" \
|
||||||
|
-d "Blog workflow failed!" \
|
||||||
|
-u ${NTFY_TOKEN} \
|
||||||
|
${NTFY_URL}/${NTFY_TOPIC}
|
||||||
|
fi
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ Test des deux cas, fonctionne comme prévu :
|
||||||
|

|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Après avoir testé **Gotify** et **Ntfy**, j’ai trouvé mon prochain système de notifications. Les deux sont bons pour le job, mais je devais en choisir un et j’ai une petite préférence pour Ntfy.
|
||||||
|
|
||||||
|
L’application serait parfaite si je pouvais gérer les utilisateurs et les accès depuis l’interface Web. Aussi, je préférerais pouvoir gérer l’icône des topics globalement plutôt que depuis mon mobile.
|
||||||
|
|
||||||
|
Quoi qu’il en soit, je suis très satisfait du résultat de cette première implémentation et j’ai hâte d’ajouter des notifications ailleurs !
|
379
content/post/5-notification-system-gotify-vs-ntfy.md
Normal file
@@ -0,0 +1,379 @@
|
|||||||
|
---
|
||||||
|
slug: notification-system-gotify-vs-ntfy
|
||||||
|
title: Testing Gotify and Ntfy, a Self-Hosted Notification System
|
||||||
|
description: Gotify or Ntfy? I tested both to create a reliable, self-hosted notification system for my homelab and integrated it with CI/CD pipeline.
|
||||||
|
date: 2025-06-13
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- notification
|
||||||
|
- ntfy
|
||||||
|
- gotify
|
||||||
|
- ci-cd
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
To know what is going on in my homelab and be warned when something fails, I want to setup a notification system where almost anything could seamlessly send me a message that I would receive on my mobile.
|
||||||
|
|
||||||
|
In the past I was using **Pushover**, which was great, but I want to explore new options, more modern and eventually self-hosted.
|
||||||
|
|
||||||
|
## Choose the Right Notification System
|
||||||
|
|
||||||
|
The key elements to determine the right system for me would be:
|
||||||
|
- **Android application**: mandatory, a sleek and intuitive UI is important.
|
||||||
|
- **Integration**: I want the service integrated anywhere I want to be notified.
|
||||||
|
- **Self hosted**: Host it myself is always better for privacy.
|
||||||
|
|
||||||
|
After a quick research, the most suitable tools on the market are:
|
||||||
|
- **Ntfy**
|
||||||
|
- **Gotify**
|
||||||
|
|
||||||
|
Given the comments on internet and after testing quickly both Android app, I can't really decide. I think Ntfy is the better option, but I will install and test them both to make my mind!
|
||||||
|
|
||||||
|
## Gotify
|
||||||
|
|
||||||
|
I heard about Gotify some time ago, actually before looking at other alternatives, I had that one in mind. I quickly had a look at its [documentation](https://gotify.net/docs/) and this seems to be pretty straight forward.
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
As usual, I will deploy the Gotify server with `docker compose` on `dockerVM`, a VM hosting my applications as docker container. I create a new `gotify` folder in `/appli/docker/` and I copy paste my `docker-compose.yml` template in there.
|
||||||
|
|
||||||
|
`docker-compose.yml`
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
gotify:
|
||||||
|
image: gotify/server
|
||||||
|
container_name: gotify
|
||||||
|
volumes:
|
||||||
|
- /appli/data/gotify/data/:/app/data
|
||||||
|
environment:
|
||||||
|
- TZ=Europe/Paris
|
||||||
|
- GOTIFY_DEFAULTUSER_NAME=${GOTIFY_DEFAULTUSER_NAME}
|
||||||
|
- GOTIFY_DEFAULTUSER_PASS=${GOTIFY_DEFAULTUSER_PASS}
|
||||||
|
networks:
|
||||||
|
- web
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.gotify.rule=Host(`gotify.vezpi.me`)
|
||||||
|
- traefik.http.routers.gotify.entrypoints=https
|
||||||
|
- traefik.http.routers.gotify.tls.certresolver=letsencrypt
|
||||||
|
- traefik.http.services.gotify.loadbalancer.server.port=80
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
networks:
|
||||||
|
web:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
`.env`
|
||||||
|
```
|
||||||
|
GOTIFY_DEFAULTUSER_NAME=vez
|
||||||
|
GOTIFY_DEFAULTUSER_PASS=<password>
|
||||||
|
```
|
||||||
|
|
||||||
|
In the [documentation](https://gotify.net/docs/config), I can see that several database backend can be used, by default it is using **sqlite3** which will be fine for the test. Switching to **PostgreSQL** could be a thing if I decide to stick with Gotify. In that same page, I can see the different environment variables that I can use to configure the server from the `docker-compose.yml` file.
|
||||||
|
|
||||||
|
When my config files are ready, I create a new entry in my Caddy plugin in OPNsense to forward my new Gotify URL: https://gotify.vezpi.me.
|
||||||
|
|
||||||
|
I also create the folder `/appli/data/gotify/data/` in `dockerVM` to mount it as a volume and store datas:
|
||||||
|
```bash
|
||||||
|
mkdir -p /appli/data/gotify/data/
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally I spin the docker stack up:
|
||||||
|
```bash
|
||||||
|
$ docker compose up -d
|
||||||
|
[+] Running 5/5
|
||||||
|
✔ gotify Pulled
|
||||||
|
✔ 63ce8e957633 Pull complete
|
||||||
|
✔ e7def9680541 Pull complete
|
||||||
|
✔ 9a1821c438b4 Pull complete
|
||||||
|
✔ ad316556c9ff Pull complete
|
||||||
|
[+] Running 1/1
|
||||||
|
✔ Container gotify Started
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ Reaching the URL https://gotify.vezpi.me gives me the Gotify login page:
|
||||||
|

|
||||||
|
|
||||||
|
After login, I can access the dashboard, with no messages obviously:
|
||||||
|

|
||||||
|
|
||||||
|
### Creating an Application
|
||||||
|
|
||||||
|
To allow messages to be pushed, I before need to create an application for which the messages will be regrouped for. This can be done in two ways:
|
||||||
|
- **WebUI**
|
||||||
|
- **REST-API**
|
||||||
|
|
||||||
|
For the test, I will use the WebUI, I click on the `APPS` button at the top and `CREATE APPLICATION`. I choose a wonderful application name and description.
|
||||||
|

|
||||||
|
|
||||||
|
Once my application in created, a token is generated for it. I can edit the application to change anything, I can also upload an icon.
|
||||||
|

|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
My application is now visible on the sidebar, let's now try to send a message. To push it, I can use `curl` and I need the token of the application.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl "https://gotify.vezpi.me/message?token=<apptoken>" -F "title=Cooked!" -F "message=The potoaries are ready!" -F "priority=5"
|
||||||
|
```
|
||||||
|
I instantly received the notification on my mobile and on my browser.
|
||||||
|
|
||||||
|
I retried to send another message but with a lower priority: `-2`. I didn't get any notification in my browser, I see a slight differences between the two messages. On my mobile, only my watch received it, I don't see it on my screen, but I can find it on the notification center.
|
||||||
|

|
||||||
|
|
||||||
|
### Android App
|
||||||
|
|
||||||
|
Here some screenshots from my Android device:
|
||||||
|

|
||||||
|
|
||||||
|
For some reason, a notification randomly pops up to tell me that I'm connected to Gotify:
|
||||||
|

|
||||||
|
### Conclusion
|
||||||
|
|
||||||
|
On the [documentation](https://gotify.net/docs/msgextras), I found some extras features, like adding images or click actions. In summary, it does the job, that's it. Easy installation process, the utilization is not hard, but I need to create an application for a token, then add this token anytime I want to push messages there.
|
||||||
|
|
||||||
|
## Ntfy
|
||||||
|
|
||||||
|
Ntfy seems very clean, let's install it and see what it got!
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
Same story here with `docker compose` on `dockerVM`. I create a new `ntfy` folder in `/appli/docker/` and I copy paste the `docker-compose.yml` template.
|
||||||
|
|
||||||
|
`docker-compose.yml`
|
||||||
|
```yaml
|
||||||
|
services:
|
||||||
|
ntfy:
|
||||||
|
image: binwiederhier/ntfy
|
||||||
|
container_name: ntfy
|
||||||
|
command:
|
||||||
|
- serve
|
||||||
|
volumes:
|
||||||
|
- /appli/data/ntfy/data:/var/cache/ntfy
|
||||||
|
environment:
|
||||||
|
- TZ=Europe/Paris
|
||||||
|
- NTFY_BASE_URL=https://ntfy.vezpi.me
|
||||||
|
- NTFY_CACHE_FILE=/var/cache/ntfy/cache.db
|
||||||
|
- NTFY_AUTH_FILE=/var/cache/ntfy/auth.db
|
||||||
|
- NTFY_ATTACHMENT_CACHE_DIR=/var/cache/ntfy/attachments
|
||||||
|
- NTFY_AUTH_DEFAULT_ACCESS=deny-all
|
||||||
|
- NTFY_BEHIND_PROXY=true
|
||||||
|
- NTFY_ENABLE_LOGIN=true
|
||||||
|
user: 1000:1000
|
||||||
|
networks:
|
||||||
|
- web
|
||||||
|
labels:
|
||||||
|
- traefik.enable=true
|
||||||
|
- traefik.http.routers.ntfy.rule=Host(`ntfy.vezpi.me`)
|
||||||
|
- traefik.http.routers.ntfy.entrypoints=https
|
||||||
|
- traefik.http.routers.ntfy.tls.certresolver=letsencrypt
|
||||||
|
- traefik.http.services.ntfy.loadbalancer.server.port=80
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD-SHELL", "wget -q --tries=1 http://ntfy:80/v1/health -O - | grep -Eo '\"healthy\"\\s*:\\s*true' || exit 1"]
|
||||||
|
interval: 60s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 40s
|
||||||
|
restart: unless-stopped
|
||||||
|
|
||||||
|
networks:
|
||||||
|
web:
|
||||||
|
external: true
|
||||||
|
```
|
||||||
|
|
||||||
|
I also create the persistent volume folder `/appli/data/ntfy/data/` in `dockerVM`:
|
||||||
|
```bash
|
||||||
|
mkdir -p /appli/data/ntfy/data/
|
||||||
|
```
|
||||||
|
|
||||||
|
The [documentation](https://docs.ntfy.sh/config/) is impressive, I tried to gather the config for a quickstart. I should be good to start the server.
|
||||||
|
|
||||||
|
Again here, I create a new domain for my Caddy reverse proxy plugin in OPNsense for the URL https://ntfy.vezpi.me.
|
||||||
|
```bash
|
||||||
|
$ docker compose up -d
|
||||||
|
[+] Running 4/4
|
||||||
|
✔ ntfy Pulled
|
||||||
|
✔ f18232174bc9 Already exists
|
||||||
|
✔ f5bf7a328fac Pull complete
|
||||||
|
✔ 572c745ef6c3 Pull complete
|
||||||
|
[+] Running 1/1
|
||||||
|
✔ Container ntfy Started
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ The URL https://ntfy.vezpi.me gives me to the Ntfy dashboard:
|
||||||
|

|
||||||
|
|
||||||
|
At start I don't have any user and none is created by default, as I denied all access to anonymous in the config, I need to create one.
|
||||||
|
|
||||||
|
To list the users, I can use this command:
|
||||||
|
```bash
|
||||||
|
$ docker exec -it ntfy ntfy user list
|
||||||
|
user * (role: anonymous, tier: none)
|
||||||
|
- no topic-specific permissions
|
||||||
|
- no access to any (other) topics (server config)
|
||||||
|
```
|
||||||
|
|
||||||
|
I create an user with admin privileges:
|
||||||
|
```bash
|
||||||
|
$ docker exec -it ntfy ntfy user add --role=admin vez
|
||||||
|
user vez added with role admin
|
||||||
|
```
|
||||||
|
|
||||||
|
I can now login into the WebUI, and I can now switch to dark mode, my eyes are grateful.
|
||||||
|
|
||||||
|
### Topics
|
||||||
|
|
||||||
|
In Ntfy there are no applications to create, but messages are grouped into topics, more readable than a token when sending messages. When the topic is created I can change the display name or send test messages. On the WebUI though I don't find any option to change the icon, where I can find this option in the Android App which is not really convenient.
|
||||||
|

|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
Sending a message is actually harder than I thought. Because I set up authentication, I also need to authenticate to send messages:
|
||||||
|
```
|
||||||
|
curl \
|
||||||
|
-H "Title: Cooked!" \
|
||||||
|
-H "Priority: high" \
|
||||||
|
-d "The potatoes are ready!" \
|
||||||
|
-u "vez:<password>" \
|
||||||
|
https://ntfy.vezpi.me/patato
|
||||||
|
```
|
||||||
|
|
||||||
|
### Android App
|
||||||
|
|
||||||
|
Here are some screenshots of Ntfy Android App:
|
||||||
|

|
||||||
|
### Conclusion
|
||||||
|
|
||||||
|
Ntfy is a beautiful application with a really strong [documentation](https://docs.ntfy.sh/). The possibilities are endless and the list of integration is impressive. The installation was not hard but required a bit of more setup. The needs for CLI to configure users and permissions is not really convenient.
|
||||||
|
|
||||||
|
On the Android App, I regret that there is not a view to see all the messages from different topics. On the other hand on the WebUI, I wanted to set icons for each topics. What I found interesting was the possibility to have topics from different servers.
|
||||||
|
|
||||||
|
## Comparison
|
||||||
|
|
||||||
|
**Gotify** is simple, all users will have access to any applications. You don't need user credentials to push messages, only the application token. The Android App is effective, but personally while the icon is funny, I don't really like it.
|
||||||
|
|
||||||
|
**Ntfy** feels more advanced and complete, with fine grained permission. The UI is sleek yet still simple, the possibilities endless.
|
||||||
|
|
||||||
|
Overall, only small details make me favor Ntfy over Gotify, eg., having access to topics from different servers, ACL or be able to add emojis to the messages, but both applications are really good for the job.
|
||||||
|
|
||||||
|
## Implementing Real Scenario Notification
|
||||||
|
|
||||||
|
While I was setting up my CI/CD pipeline for my blog deployment, I wanted to be warned whenever something happens, let see how I can implement that with Ntfy.
|
||||||
|
|
||||||
|
### Access Control
|
||||||
|
|
||||||
|
I could use my `admin` user to either send messages from the pipeline and receive them on my Android device, while this is easier to setup, I want to implement least access privileges, which Ntfy allow. I will then create a dedicated user for my CI/CD pipeline and another for my Android device.
|
||||||
|
|
||||||
|
#### Pipeline User
|
||||||
|
|
||||||
|
This guy will only be allowed to send messages on the `blog` topic, I call it `gitea_blog`.
|
||||||
|
```bash
|
||||||
|
$ ntfy user add gitea_blog
|
||||||
|
user gitea_blog added with role user
|
||||||
|
$ ntfy access gitea_blog blog wo
|
||||||
|
granted write-only access to topic blog
|
||||||
|
|
||||||
|
user gitea_blog (role: user, tier: none)
|
||||||
|
- write-only access to topic blog
|
||||||
|
```
|
||||||
|
|
||||||
|
I quickly try to send a message on that topic:
|
||||||
|
```bash
|
||||||
|
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/blog
|
||||||
|
{"id":"xIgwz9dr1w9Z","time":1749587681,"expires":1749630881,"event":"message","topic":"blog","message":"Message test from gitea_blog!"}
|
||||||
|
```
|
||||||
|
|
||||||
|

|
||||||
|
✅ Message received!
|
||||||
|
|
||||||
|
I also try to send a message on my test topic:
|
||||||
|
```bash
|
||||||
|
$ curl -u gitea_blog:<password> -d "Message test from gitea_blog!" https://ntfy.vezpi.me/potato
|
||||||
|
{"code":40301,"http":403,"error":"forbidden","link":"https://ntfy.sh/docs/publish/#authentication"}
|
||||||
|
```
|
||||||
|
❌ Denied as expected.
|
||||||
|
|
||||||
|
#### Android Device User
|
||||||
|
|
||||||
|
From my Android device I only want to receive messages, but on all topics. I create the user `android_s25u`:
|
||||||
|
```bash
|
||||||
|
$ ntfy user add android_s25u
|
||||||
|
user android_s25u added with role user
|
||||||
|
$ ntfy access android_s25u "*" ro
|
||||||
|
granted read-only access to topic *
|
||||||
|
|
||||||
|
user android_s25u (role: user, tier: none)
|
||||||
|
- read-only access to topic *
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ After setting up the user on the Ntfy Android App, I can read my messages on `https://ntfy.vezpi.me/blog` and also on the testing one.
|
||||||
|
|
||||||
|
### Implementation
|
||||||
|
|
||||||
|
Now my users are setup, I want to add a `Notify` job in my CI/CD pipeline for the blog deployment in **Gitea**, you can find the full workflow in [this article]({{< ref "post/4-blog-deployment-ci-cd-pipeline-gitea-actions" >}}).
|
||||||
|
|
||||||
|
#### Create a Secret
|
||||||
|
|
||||||
|
To allow my Gitea Runner to use my `gitea_blog` user in its job, I want to create a secret. I explore the `Blog` Gitea repository `Settings`, then `Actions` > `Secrets` > `Add Secret`. Here I set the secret value with the `<user>:<password>` format:
|
||||||
|

|
||||||
|
|
||||||
|
### Write the `Notify` Code
|
||||||
|
|
||||||
|
Now I can write the code which will send me a message when a new deployment occurs.
|
||||||
|
|
||||||
|
If the deployment is successful, the priority would be minimal, no notifications needed on my mobile, just for me to view the events in the Android Ntfy App if I need to.
|
||||||
|
|
||||||
|
If anything fails, I want to be notified on my mobile with higher priority. Ntfy allows me to add actions on my notifications, I will create 2 actions:
|
||||||
|
- **View Run**: Direct link to the workflow run in Gitea to see what happened.
|
||||||
|
- **Verify Blog**: Link to the blog to make sure it is still online.
|
||||||
|
```yaml
|
||||||
|
Notify:
|
||||||
|
needs: [Check-Rebuild, Build, Deploy-Staging, Test-Staging, Merge, Deploy-Production, Test-Production, Clean]
|
||||||
|
runs-on: ubuntu
|
||||||
|
if: always()
|
||||||
|
env:
|
||||||
|
NTFY_URL: https://ntfy.vezpi.me
|
||||||
|
NTFY_TOPIC: blog
|
||||||
|
NTFY_TOKEN: ${{ secrets.NTFY_CREDENTIALS }}
|
||||||
|
steps:
|
||||||
|
- name: Notify Workflow Result
|
||||||
|
run: |
|
||||||
|
if [[
|
||||||
|
"${{ needs.Check-Rebuild.result }}" == "success" &&
|
||||||
|
("${{ needs.Build.result }}" == "success" || "${{ needs.Build.result }}" == "skipped") &&
|
||||||
|
"${{ needs.Deploy-Staging.result }}" == "success" &&
|
||||||
|
"${{ needs.Test-Staging.result }}" == "success" &&
|
||||||
|
"${{ needs.Merge.result }}" == "success" &&
|
||||||
|
"${{ needs.Deploy-Production.result }}" == "success" &&
|
||||||
|
"${{ needs.Test-Production.result }}" == "success" &&
|
||||||
|
("${{ needs.Clean.result }}" == "success" || "${{ needs.Clean.result }}" == "skipped")
|
||||||
|
]]; then
|
||||||
|
curl -H "Priority: min" \
|
||||||
|
-H "Tags: white_check_mark" \
|
||||||
|
-d "Blog workflow completed successfully." \
|
||||||
|
-u ${NTFY_TOKEN} \
|
||||||
|
${NTFY_URL}/${NTFY_TOPIC}
|
||||||
|
else
|
||||||
|
curl -H "Priority: high" \
|
||||||
|
-H "Tags: x" \
|
||||||
|
-H "Actions: view, View Run, ${{ gitea.server_url }}/${{ gitea.repository }}/actions/runs/${{ gitea.run_number }}, clear=true; \
|
||||||
|
view, Verify Blog, https://blog.vezpi.com, clear=true" \
|
||||||
|
-d "Blog workflow failed!" \
|
||||||
|
-u ${NTFY_TOKEN} \
|
||||||
|
${NTFY_URL}/${NTFY_TOPIC}
|
||||||
|
fi
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ Testing both cases, work as expected:
|
||||||
|

|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
After testing **Gotify** and **Ntfy**, I found my next notification system. They are both good for the job but I had to pick one and I have a little preference for Ntfy.
|
||||||
|
|
||||||
|
The application would be perfect if I could manage the users and access from the WebUI, also I would prefer to manage the topic's icon globally and not having to upload it from my mobile.
|
||||||
|
|
||||||
|
Anyway I'm very satisfied with the results on my first implementation and I look forward to add notification elsewhere!
|
708
content/post/6-ac-automation-home-assistant-node-red.fr.md
Normal file
@@ -0,0 +1,708 @@
|
|||||||
|
---
|
||||||
|
slug: ac-automation-home-assistant-node-red
|
||||||
|
title: Automatisation Complète de la Climatisation avec Home Assistant et Node-RED
|
||||||
|
description: Comment j’automatise ma clim avec Home Assistant et Node-RED pour réagir à la température, l’humidité et à tous les évènements quotidiens.
|
||||||
|
date: 2025-06-27
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- home-automation
|
||||||
|
- home-assistant
|
||||||
|
- node-red
|
||||||
|
categories:
|
||||||
|
- automation
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Dans mon appartement, j’ai un système de climatisation Daikin, qui me permet de rafraîchir en été mais aussi de chauffer en hiver. Il est composé de 3 unités intérieures :
|
||||||
|
- Salon
|
||||||
|
- Chambre parentale
|
||||||
|
- Couloir (juste en face de mon bureau et de la chambre de mon fils)
|
||||||
|
|
||||||
|
J’ai toujours trouvé ça pénible de devoir les allumer manuellement quand j’en avais besoin, et j’oubliais souvent de les éteindre ensuite, sans parler de la télécommande que je passais mon temps à chercher.
|
||||||
|
|
||||||
|
Et si je pouvais automatiser tout ça ? Après tout, j’utilise déjà Home Assistant pour piloter beaucoup de choses chez moi, alors contrôler la clim, ça me semble logique.
|
||||||
|
|
||||||
|
### Home Assistant
|
||||||
|
|
||||||
|
Home Assistant, c’est le cerveau de ma maison connectée. Il relie tous mes appareils (lumières, capteurs, volets, etc.) dans une interface unique. Sa vraie force, c’est la possibilité de créer des automatisations : si quelque chose se passe, alors fait ça. Des actions simples comme “allumer la lumière de la cuisine quand un mouvement est détecté” se mettent en place en quelques clics. Et pour des scénarios plus avancés, Home Assistant propose un système de scripts en YAML avec des conditions, des minuteries, des déclencheurs, et même du templating.
|
||||||
|
|
||||||
|
Mais dès qu’on commence à faire des automatisations un peu complexes, qui dépendent de plusieurs capteurs, d’horaires spécifiques ou de la présence de quelqu’un, ça devient vite difficile à lire. Les blocs de code YAML s’allongent, et on ne sait plus trop ce qui fait quoi, surtout quand on veut corriger un petit détail plusieurs semaines plus tard.
|
||||||
|
|
||||||
|
### Node-RED
|
||||||
|
|
||||||
|
C’est exactement pour ça que je suis passé à Node-RED. C’est un outil visuel qui permet de construire des logiques avec des blocs appelés “nœuds”, qu’on relie entre eux avec des flèches pour créer un **flow**. Chaque nœud fait une petite action : déclencher à une certaine heure, vérifier une condition, envoyer une commande à un appareil, etc. Au lieu d’écrire du YAML, on glisse les éléments, on les connecte, et c’est tout.
|
||||||
|
|
||||||
|
Node-RED ne remplace pas Home Assistant, il le renforce. Je ne détaillerai pas l'installation de Node-RED ni son intégration à HA, je l'ai fait il y a deux ans, mais de mémoire c'est assez simple.
|
||||||
|
|
||||||
|
## Ancien Workflow
|
||||||
|
|
||||||
|
J’avais déjà une solution plutôt efficace pour contrôler ma climatisation via Home Assistant et Node-RED, mais je voulais l’améliorer pour qu’elle prenne aussi en compte le taux d’humidité dans l’appartement. Mon workflow actuel, bien qu’il fonctionne, n’était pas vraiment évolutif et assez difficile à maintenir :
|
||||||
|

|
||||||
|
|
||||||
|
## Nouveau Workflow
|
||||||
|
|
||||||
|
Plutôt que de bricoler ce flow existant, j’ai préféré repartir de zéro avec le même objectif : piloter le système de climatisation en prenant en compte tous les capteurs disponibles : thermomètres, humidité, capteurs d’ouverture, présence des occupants, moment de la journée, etc.
|
||||||
|
|
||||||
|
### Objectifs
|
||||||
|
|
||||||
|
L’idée est assez simple : ne plus avoir à penser à la climatisation, tout en restant efficace.
|
||||||
|
|
||||||
|
Mais concrètement, qu’est-ce que ça veut dire ? Je veux que la température et le taux d’humidité restent dans des valeurs confortables, que je sois présent ou non. Si j’ouvre les fenêtres, la clim doit s’arrêter. Si l’air est trop humide, je veux qu’il soit asséché. Si j’allume ou éteins manuellement la clim, je ne veux pas que ça écrase mes réglages. La nuit, je n’ai pas besoin de rafraîchir le salon et je veux aussi que le système soit silencieux, etc.
|
||||||
|
|
||||||
|
Pour m’aider à faire tout ça, j’utilise 4 [capteurs de température et d’humidité Aqara](https://eu.aqara.com/fr-eu/products/aqara-temperature-and-humidity-sensor), un dans chacune de mes pièces principales. J’utilise aussi quelques [capteurs d’ouverture Aqara](https://eu.aqara.com/fr-eu/products/aqara-door-and-window-sensor) pour savoir si une fenêtre est ouverte.
|
||||||
|
|
||||||
|
### Workflow
|
||||||
|
|
||||||
|
Laissez-moi vous présenter mon nouveau workflow de climatisation dans Node-RED, et vous expliquer en détail comment il fonctionne :
|
||||||
|

|
||||||
|
|
||||||
|
#### #### 1. Capteurs de Température
|
||||||
|
|
||||||
|
Dans le premier nœud, j’ai regroupé tous les capteurs thermiques dans un seul `trigger state node`, en ajoutant non seulement la température mais aussi le taux d’humidité géré par chaque capteur. Ce nœud contient donc une liste de 8 entités (2 pour chaque capteur). À chaque fois qu’une de ces 8 valeurs change, le nœud est déclenché :
|
||||||
|

|
||||||
|
|
||||||
|
Chacun de mes capteurs thermiques porte un nom de couleur en français, car ils ont tous un autocollant coloré pour les distinguer :
|
||||||
|
- **Jaune** : Salon
|
||||||
|
- **Bleu** : Chambre
|
||||||
|
- **Rouge** : Bureau
|
||||||
|
- **Vert** : Chambre de mon fils
|
||||||
|
|
||||||
|
Le deuxième nœud est un `function node` dont le rôle est de déterminer à quelle pièce appartient le capteur :
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"temperature_jaune": "salon",
|
||||||
|
"temperature_bleu": "chambre",
|
||||||
|
"temperature_rouge": "couloir",
|
||||||
|
"temperature_vert": "couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Match pattern like: sensor.temperature_rouge_temperature
|
||||||
|
const match = msg.topic.match(/^sensor\.(.+)_(temperature|humidity)$/);
|
||||||
|
|
||||||
|
if (!match) {
|
||||||
|
node.warn("Topic format not recognized: " + msg.topic);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.payload = {
|
||||||
|
room: association[match[1]],
|
||||||
|
sensor: match[1]
|
||||||
|
};
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
Pour le dernier nœud, dans la majorité des cas, les capteurs envoient deux messages simultanés : l’un pour la température, l’autre pour l’humidité. J’ai donc ajouté un `join node` pour fusionner ces deux messages s’ils sont envoyés dans la même seconde :
|
||||||
|

|
||||||
|
|
||||||
|
#### 2. Notification
|
||||||
|
|
||||||
|
Il peut arriver que les capteurs de température n’envoient plus d’état pendant un certain temps, pour une raison ou une autre. Dans ce cas, ils renvoient simplement leur dernière valeur connue, ce qui peut bloquer l’unité de climatisation associée.
|
||||||
|
|
||||||
|
La solution que j’ai trouvée efficace consiste à envoyer une notification si un capteur n’a pas transmis de nouvelle valeur depuis plus de 3 heures. En fonctionnement normal, chaque capteur envoie une mise à jour environ toutes les 15 minutes.
|
||||||
|
|
||||||
|
Le premier nœud est un `function node` un peu technique, qui crée une variable de flux comme minuteur pour chaque capteur. Une fois le délai écoulé, un message est envoyé au nœud suivant :
|
||||||
|
```js
|
||||||
|
const sensor = msg.payload.sensor;
|
||||||
|
const timeoutKey = `watchdog_${sensor}`;
|
||||||
|
const messages = {
|
||||||
|
"temperature_jaune": {"title": "Température Salon", "message": "Capteur de température du salon semble hors service"},
|
||||||
|
"temperature_bleu": {"title": "Température Chambre", "message": "Capteur de température de la chambre semble hors service"},
|
||||||
|
"temperature_rouge": {"title": "Température Bureau", "message": "Capteur de température du bureau semble hors service"},
|
||||||
|
"temperature_vert": {"title": "Température Raphaël", "message": "Capteur de température de Raphaël semble hors service"}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Clear existing timer
|
||||||
|
const existing = flow.get(timeoutKey);
|
||||||
|
if (existing) clearTimeout(existing);
|
||||||
|
|
||||||
|
// Set new timer
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
node.send({
|
||||||
|
payload: `⚠️ No update from ${sensor} in 3 hours.`,
|
||||||
|
sensor: sensor,
|
||||||
|
title: messages[sensor]["title"],
|
||||||
|
message: messages[sensor]["message"]
|
||||||
|
});
|
||||||
|
}, 3 * 60 * 60 * 1000); // 3 hours
|
||||||
|
|
||||||
|
flow.set(timeoutKey, timer);
|
||||||
|
|
||||||
|
return null; // Don't send anything now
|
||||||
|
```
|
||||||
|
|
||||||
|
Le second nœud est un `call service node` qui envoie une notification sur mon téléphone Android avec les informations fournies :
|
||||||
|

|
||||||
|
|
||||||
|
#### 3. Curseurs de Température
|
||||||
|
|
||||||
|
Pour pouvoir ajuster la température sans avoir à modifier tout le workflow, j’ai créé deux entrées (ou helper) Home Assistant, de type number, pour chaque unité de climatisation, ce qui me fait un total de 6 entrées :
|
||||||
|

|
||||||
|
|
||||||
|
Ces valeurs représentent la température de base utilisée pour le calcul des seuils, en fonction des offsets que je détaillerai plus loin.
|
||||||
|
|
||||||
|
Le premier nœud est un `trigger state node` qui regroupe les 6 entités. Si je modifie l’une de ces valeurs, le nœud est déclenché :
|
||||||
|

|
||||||
|
|
||||||
|
Le deuxième nœud est un `function node`, qui permet de déterminer la pièce concernée :
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"input_number.temp_ete_salon": "salon",
|
||||||
|
"input_number.temp_hiver_salon": "salon",
|
||||||
|
"input_number.temp_ete_chambre": "chambre",
|
||||||
|
"input_number.temp_hiver_chambre": "chambre",
|
||||||
|
"input_number.temp_ete_couloir": "couloir",
|
||||||
|
"input_number.temp_hiver_couloir": "couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
msg.payload = { room: association[msg.topic] };
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4. Interrupteurs
|
||||||
|
|
||||||
|
Dans Home Assistant, j’utilise d’autres entrées, mais cette fois sous forme de booléens. Le plus important est celui dédié à la climatisation, qui me permet de désactiver manuellement tout le workflow. J’en ai d’autres qui sont automatisés, par exemple pour le moment de la journée ou la détection de présence à la maison.
|
||||||
|
|
||||||
|
J’utilise un autre `trigger state node` qui regroupe tous mes interrupteurs sous forme de booléens, y compris un bouton de test utilisé pour le débogage :
|
||||||
|

|
||||||
|
|
||||||
|
Comme ces interrupteurs impactent tout l’appartement (et non une seule unité), le nœud suivant est un `change node` qui définit la valeur de la pièce à `partout` :
|
||||||
|

|
||||||
|
|
||||||
|
#### 5. Fenêtres
|
||||||
|
|
||||||
|
Les derniers déclencheurs sont les fenêtres. Si j’ouvre ou ferme une fenêtre située près d’une unité, cela active le workflow. J’ai des capteurs d’ouverture sur certaines fenêtres, mais pour l’unité du couloir, j’utilise l’état des fenêtres Velux. Certaines pièces ayant plusieurs fenêtres, j’ai créé une entrée de type groupe pour les regrouper.
|
||||||
|
|
||||||
|
Le premier nœud est le dernier `trigger state node`. La valeur retournée est une string qu’il faudra ensuite convertir en booléen :
|
||||||
|

|
||||||
|
|
||||||
|
Juste après, un autre `function node` permet d’identifier la pièce concernée :
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"binary_sensor.groupe_fenetre_salon": "salon",
|
||||||
|
"binary_sensor.fenetre_chambre_contact": "chambre",
|
||||||
|
"cover.groupe_fenetre_couloir": "couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
msg.payload = {
|
||||||
|
room: association[msg.topic]
|
||||||
|
};
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 6. Fenêtre Watchdog
|
||||||
|
|
||||||
|
Quand j’ouvre une fenêtre, ce n’est pas forcément pour la laisser ouverte longtemps. Je peux simplement faire sortir le chat ou jeter un œil au portail. Je ne veux pas que la climatisation se coupe dès que j’ouvre une fenêtre. Pour contourner cela, j’ai mis en place un watchdog pour chaque unité, afin de retarder l’envoi du message pendant un certain temps.
|
||||||
|
|
||||||
|
Le premier nœud est un `switch node`. En fonction de la pièce transmise par le nœud précédent, il envoie le message au _watchdog_ correspondant :
|
||||||
|

|
||||||
|
|
||||||
|
Viennent ensuite les _watchdogs_, des `trigger nodes`, qui retardent le message pendant un certain temps, et prolongent ce délai si un autre message est reçu entre-temps :
|
||||||
|

|
||||||
|
|
||||||
|
#### 7. Climatisation Activée ?
|
||||||
|
|
||||||
|
Tous ces déclencheurs arrivent maintenant dans la chaîne de traitement, qui va déterminer ce que le système doit faire. Mais avant cela, on vérifie si l’automatisation est activée. J’ai ajouté ce kill switch au cas où, même si je l’utilise rarement.
|
||||||
|
|
||||||
|
Le premier nœud est un `delay node` qui régule le débit des messages entrants à 1 message par seconde :
|
||||||
|

|
||||||
|
|
||||||
|
Le deuxième nœud est un `current state node` qui vérifie si le booléen `climatisation` est activé :
|
||||||
|

|
||||||
|
|
||||||
|
#### 8. Configuration des pièces
|
||||||
|
|
||||||
|
L’idée ici est d’associer la configuration de la pièce au message. Chaque pièce a sa propre configuration : quelle unité est utilisée, quels capteurs sont associés, et surtout, dans quelles conditions elle doit s’allumer ou s’éteindre.
|
||||||
|
|
||||||
|
Les unités de climatisation disposent de 4 modes :
|
||||||
|
- Refroidissement (Cool)
|
||||||
|
- Déshumidification (Dry)
|
||||||
|
- Ventilation (Fan)
|
||||||
|
- Chauffage (Heat)
|
||||||
|
|
||||||
|
Pour déterminer quel mode utiliser, j’utilise des seuils pour chaque mode et la vitesse de ventilation, avec différents offsets selon la situation. Je peux ainsi définir un offset spécifique la nuit ou en cas d’absence. Je peux aussi définir un offset sur `disabled`, ce qui forcera l’arrêt de l’unité.
|
||||||
|
|
||||||
|
Le premier nœud est un `switch node`, basé sur la valeur `room`, qui oriente le message vers la configuration associée. Si la pièce est `partout`, le message est dupliqué vers les 3 configurations de pièce :
|
||||||
|

|
||||||
|
|
||||||
|
Il est ensuite connecté à un `change node`, qui ajoute la configuration dans `room_config`. Voici un exemple avec la configuration du salon :
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"threshold": {
|
||||||
|
"cool": {
|
||||||
|
"start": {
|
||||||
|
"1": 1,
|
||||||
|
"2": 1.5,
|
||||||
|
"3": 2,
|
||||||
|
"4": 2.5,
|
||||||
|
"quiet": 0
|
||||||
|
},
|
||||||
|
"stop": -0.3,
|
||||||
|
"target": -1,
|
||||||
|
"offset": {
|
||||||
|
"absent": 1,
|
||||||
|
"vacances": "disabled",
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": "disabled",
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": "disabled"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"dry": {
|
||||||
|
"start": {
|
||||||
|
"quiet": -1
|
||||||
|
},
|
||||||
|
"stop": -1.5,
|
||||||
|
"offset": {
|
||||||
|
"absent": "1.5",
|
||||||
|
"vacances": "disabled",
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": "disabled",
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": "disabled"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fan_only": {
|
||||||
|
"start": {
|
||||||
|
"1": -0.3,
|
||||||
|
"quiet": -0.5
|
||||||
|
},
|
||||||
|
"stop": -0.7,
|
||||||
|
"offset": {
|
||||||
|
"absent": "disabled",
|
||||||
|
"vacances": "disabled",
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": "disabled",
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": "disabled"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"heat": {
|
||||||
|
"start": {
|
||||||
|
"1": 0,
|
||||||
|
"2": -1.5,
|
||||||
|
"quiet": 0
|
||||||
|
},
|
||||||
|
"stop": 1,
|
||||||
|
"target": 1,
|
||||||
|
"offset": {
|
||||||
|
"absent": -1.5,
|
||||||
|
"vacances": -3,
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": 0,
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": -1.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"unit": "climate.clim_salon",
|
||||||
|
"timer": "timer.minuteur_clim_salon",
|
||||||
|
"window": "binary_sensor.groupe_fenetre_salon",
|
||||||
|
"thermometre": "sensor.temperature_jaune_temperature",
|
||||||
|
"humidity": "sensor.temperature_jaune_humidity",
|
||||||
|
"temp_ete": "input_number.temp_ete_salon",
|
||||||
|
"temp_hiver": "input_number.temp_hiver_salon"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### #### 9. Calcul
|
||||||
|
|
||||||
|
Maintenant que le message contient la configuration de la pièce, on entre dans la phase de calcul. On dispose du nom de l’unité de climatisation, des capteurs associés, de la température de base souhaitée et de l’offset à appliquer. À partir de ces données, on récupère les états actuels et on effectue les calculs.
|
||||||
|
|
||||||
|
Le premier nœud est un `delay node` qui régule le débit des messages entrants, car le bloc précédent a potentiellement généré trois messages si toutes les pièces sont concernées.
|
||||||
|
|
||||||
|
Le deuxième nœud est le plus important du workflow, un `function node` qui remplit plusieurs rôles :
|
||||||
|
|
||||||
|
- Récupère les états des capteurs depuis Home Assistant
|
||||||
|
- Calcule les seuils des modes à partir des offsets
|
||||||
|
- Désactive certains modes si les conditions sont remplies
|
||||||
|
- Injecte les valeurs dans le `payload`
|
||||||
|
```js
|
||||||
|
// --- Helper: Get Home Assistant state by entity ID ---
|
||||||
|
function getState(entityId) {
|
||||||
|
return global.get("homeassistant.homeAssistant.states")[entityId]?.state;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Determine current time period based on sensors ---
|
||||||
|
const periods = ["jour", "soir", "nuit", "matin"];
|
||||||
|
msg.payload.period = periods.find(p => getState(`binary_sensor.${p}`) === 'on') || 'unknown';
|
||||||
|
|
||||||
|
// --- Determine presence status (absent = inverse of presence) ---
|
||||||
|
const vacances = getState("input_boolean.absent");
|
||||||
|
const absent = getState("input_boolean.presence") === 'on' ? 'off' : 'on';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively adds the base temperature and offset to all numeric start values in a threshold config
|
||||||
|
*/
|
||||||
|
function applyOffsetToThresholds(threshold, baseTemp, globalOffset) {
|
||||||
|
for (const [key, value] of Object.entries(threshold)) {
|
||||||
|
if (key === "offset") continue;
|
||||||
|
|
||||||
|
if (typeof value === 'object') {
|
||||||
|
applyOffsetToThresholds(value, baseTemp, globalOffset);
|
||||||
|
} else {
|
||||||
|
threshold[key] += baseTemp + globalOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the global offset for a mode, based on presence, vacation, window, and time of day
|
||||||
|
*/
|
||||||
|
function calculateGlobalOffset(offsets, modeName, windowState, disabledMap) {
|
||||||
|
let globalOffset = 0;
|
||||||
|
|
||||||
|
for (const [key, offsetValue] of Object.entries(offsets)) {
|
||||||
|
let conditionMet = false;
|
||||||
|
|
||||||
|
if (key === msg.payload.period) conditionMet = true;
|
||||||
|
else if (key === "absent" && absent === 'on') conditionMet = true;
|
||||||
|
else if (key === "vacances" && vacances === 'on') conditionMet = true;
|
||||||
|
else if ((key === "fenetre" || key === "window") && windowState === 'on') conditionMet = true;
|
||||||
|
|
||||||
|
if (conditionMet) {
|
||||||
|
if (offsetValue === 'disabled') {
|
||||||
|
disabledMap[modeName] = true;
|
||||||
|
return 0; // Mode disabled immediately
|
||||||
|
}
|
||||||
|
|
||||||
|
globalOffset += parseFloat(offsetValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return globalOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main logic: compute thresholds for the specified room using the provided config
|
||||||
|
*/
|
||||||
|
const cfg = msg.payload.room_config;
|
||||||
|
const room = msg.payload.room;
|
||||||
|
|
||||||
|
// Normalize window sensor state
|
||||||
|
const rawWindow = getState(cfg.window);
|
||||||
|
const window = rawWindow === 'open' ? 'on' : rawWindow === 'closed' ? 'off' : rawWindow;
|
||||||
|
|
||||||
|
// Gather temperatures
|
||||||
|
const temps = cfg.thermometre.split(',')
|
||||||
|
.map(id => parseFloat(getState(id)))
|
||||||
|
.filter(v => !isNaN(v));
|
||||||
|
|
||||||
|
const temp_avg = temps.reduce((a, b) => a + b, 0) / temps.length;
|
||||||
|
const temp_min = Math.min(...temps);
|
||||||
|
const temp_max = Math.max(...temps);
|
||||||
|
|
||||||
|
// Gather humidity
|
||||||
|
const humidities = cfg.humidity.split(',')
|
||||||
|
.map(id => parseFloat(getState(id)))
|
||||||
|
.filter(v => !isNaN(v));
|
||||||
|
|
||||||
|
const humidity_avg = humidities.reduce((a, b) => a + b, 0) / humidities.length;
|
||||||
|
const humidity_min = Math.min(...humidities);
|
||||||
|
const humidity_max = Math.max(...humidities);
|
||||||
|
|
||||||
|
// Get base temps
|
||||||
|
const temp_ete = parseFloat(getState(cfg.temp_ete));
|
||||||
|
const temp_hiver = parseFloat(getState(cfg.temp_hiver));
|
||||||
|
|
||||||
|
// Process modes
|
||||||
|
const { threshold } = cfg;
|
||||||
|
const modes = ["cool", "dry", "fan_only", "heat"];
|
||||||
|
const disabled = {};
|
||||||
|
|
||||||
|
for (const mode of modes) {
|
||||||
|
const baseTemp = (mode === "heat") ? temp_hiver : temp_ete;
|
||||||
|
const globalOffset = calculateGlobalOffset(threshold[mode].offset, mode, window, disabled);
|
||||||
|
|
||||||
|
applyOffsetToThresholds(threshold[mode], baseTemp, globalOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final message
|
||||||
|
msg.payload = {
|
||||||
|
...msg.payload,
|
||||||
|
unit: cfg.unit,
|
||||||
|
timer: cfg.timer,
|
||||||
|
threshold,
|
||||||
|
window,
|
||||||
|
temp: {
|
||||||
|
min: temp_min,
|
||||||
|
max: temp_max,
|
||||||
|
avg: Math.round(temp_avg * 100) / 100
|
||||||
|
},
|
||||||
|
humidity: {
|
||||||
|
min: humidity_min,
|
||||||
|
max: humidity_max,
|
||||||
|
avg: Math.round(humidity_avg * 100) / 100
|
||||||
|
},
|
||||||
|
disabled
|
||||||
|
};
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
Le troisième nœud est un `filter node`, qui ignore les messages suivants ayant un contenu similaire :
|
||||||
|

|
||||||
|
|
||||||
|
Le quatrième nœud vérifie si un verrou est actif à l’aide d’un `current state node`. On regarde si le minuteur associé à l’unité est inactif. Si ce n’est pas le cas, le message est ignoré :
|
||||||
|

|
||||||
|
|
||||||
|
Le dernier nœud est un autre `current state node` qui permet de récupérer l’état actuel de l’unité et ses propriétés :
|
||||||
|

|
||||||
|
|
||||||
|
#### 10. État Cible
|
||||||
|
|
||||||
|
Après les calculs, il s'agit maintenant de déterminer quel doit être le mode cible, quelle action effectuer pour converger vers ce mode à partir de l’état actuel, et le cas échéant, quelle vitesse de ventilation utiliser pour ce mode.
|
||||||
|
|
||||||
|
Les trois nœuds suivants sont des `function nodes`. Le premier détermine le mode cible à adopter parmi : `off`, `cool`, `dry`, `fan_only` et `heat` :
|
||||||
|
```js
|
||||||
|
const minHumidityThreshold = 52;
|
||||||
|
const maxHumidityThreshold = 57;
|
||||||
|
|
||||||
|
// Helper: check if mode can be activated or stopped
|
||||||
|
function isModeEligible(mode, temps, humidity, thresholds, currentMode) {
|
||||||
|
const isCurrent = (mode === currentMode);
|
||||||
|
const threshold = thresholds[mode];
|
||||||
|
|
||||||
|
if (msg.payload.disabled?.[mode]) return false;
|
||||||
|
|
||||||
|
// Determine which temperature to use for start/stop:
|
||||||
|
// start: temp.max (except heat uses temp.min)
|
||||||
|
// stop: temp.avg
|
||||||
|
let tempForCheckStart;
|
||||||
|
if (mode === "heat") {
|
||||||
|
tempForCheckStart = temps.min; // heat start uses min temp
|
||||||
|
} else {
|
||||||
|
tempForCheckStart = temps.max; // others start use max temp
|
||||||
|
}
|
||||||
|
const tempForCheckStop = temps.avg;
|
||||||
|
|
||||||
|
// Dry mode also depends on humidity thresholds
|
||||||
|
// humidity max for start, humidity avg for stop
|
||||||
|
let humidityForCheckStart = humidity.max;
|
||||||
|
let humidityForCheckStop = humidity.avg;
|
||||||
|
|
||||||
|
// For heat mode (inverted logic)
|
||||||
|
if (mode === "heat") {
|
||||||
|
if (!isCurrent) {
|
||||||
|
const minStart = Math.min(...Object.values(threshold.start));
|
||||||
|
return tempForCheckStart < minStart;
|
||||||
|
} else {
|
||||||
|
return tempForCheckStop < threshold.stop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For dry mode (humidity-dependent)
|
||||||
|
if (mode === "dry") {
|
||||||
|
// Skip if humidity too low
|
||||||
|
if (humidityForCheckStart <= (isCurrent ? minHumidityThreshold : maxHumidityThreshold)) return false;
|
||||||
|
|
||||||
|
const minStart = Math.min(...Object.values(threshold.start));
|
||||||
|
if (!isCurrent) {
|
||||||
|
return tempForCheckStart >= minStart;
|
||||||
|
} else {
|
||||||
|
return tempForCheckStop >= threshold.stop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For cool and fan_only
|
||||||
|
if (!isCurrent) {
|
||||||
|
const minStart = Math.min(...Object.values(threshold.start));
|
||||||
|
return tempForCheckStart >= minStart;
|
||||||
|
} else {
|
||||||
|
return tempForCheckStop >= threshold.stop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Main logic ---
|
||||||
|
const { threshold, temp, humidity, current_mode, disabled } = msg.payload;
|
||||||
|
|
||||||
|
const priority = ["cool", "dry", "fan_only", "heat"];
|
||||||
|
let target_mode = "off";
|
||||||
|
|
||||||
|
// Loop through priority list and stop at the first eligible mode
|
||||||
|
for (const mode of priority) {
|
||||||
|
if (isModeEligible(mode, temp, humidity, threshold, current_mode)) {
|
||||||
|
target_mode = mode;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.payload.target_mode = target_mode;
|
||||||
|
|
||||||
|
if (target_mode === "cool" || target_mode === "heat") {
|
||||||
|
msg.payload.set_temp = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
Le second compare le mode actuel avec le mode cible et choisit l’action à effectuer :
|
||||||
|
- **check** : le mode actuel est identique au mode cible.
|
||||||
|
- **start** : l’unité est éteinte, mais un mode actif est requis.
|
||||||
|
- **change** : l’unité est allumée, mais le mode cible est différent du mode actuel (et n’est pas `off`).
|
||||||
|
- **stop** : l’unité est allumée mais doit être arrêtée.
|
||||||
|
```js
|
||||||
|
let action = "check"; // default if both are same
|
||||||
|
|
||||||
|
if (msg.payload.current_mode === "off" && msg.payload.target_mode !== "off") {
|
||||||
|
action = "start";
|
||||||
|
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode !== "off" && msg.payload.current_mode !== msg.payload.target_mode) {
|
||||||
|
action = "change";
|
||||||
|
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode === "off") {
|
||||||
|
action = "stop";
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.payload.action = action;
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
Le dernier nœud détermine la vitesse de ventilation appropriée pour le mode cible, en fonction des seuils définis :
|
||||||
|
```js
|
||||||
|
// Function to find the appropriate speed key based on temperature and mode
|
||||||
|
function findSpeed(thresholdStart, temperature, mode) {
|
||||||
|
let closestSpeed = 'quiet';
|
||||||
|
let closestTemp = mode === 'heat' ? Infinity : -Infinity;
|
||||||
|
|
||||||
|
for (const speedKey in thresholdStart) {
|
||||||
|
if (speedKey !== 'quiet') {
|
||||||
|
const tempValue = thresholdStart[speedKey];
|
||||||
|
if (mode === 'heat') {
|
||||||
|
if (tempValue >= temperature && tempValue <= closestTemp) {
|
||||||
|
closestSpeed = speedKey;
|
||||||
|
closestTemp = tempValue;
|
||||||
|
}
|
||||||
|
} else { // cool, fan_only
|
||||||
|
if (tempValue <= temperature && tempValue >= closestTemp) {
|
||||||
|
closestSpeed = speedKey;
|
||||||
|
closestTemp = tempValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return closestSpeed;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msg.payload.target_mode && msg.payload.target_mode !== "off" && msg.payload.target_mode !== "dry") {
|
||||||
|
const modeData = msg.payload.threshold[msg.payload.target_mode];
|
||||||
|
if (modeData && modeData.start) {
|
||||||
|
if (msg.payload.target_mode === "heat") {
|
||||||
|
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.min, 'heat');
|
||||||
|
} else {
|
||||||
|
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.max, 'cool');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
node.error("Invalid mode data or missing 'start' thresholds", msg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No need for speed in 'off' or 'dry' modes
|
||||||
|
msg.payload.speed = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 11. Choix de l'Action
|
||||||
|
|
||||||
|
En fonction de l’action à effectuer, le `switch node` va router le message vers le bon chemin :
|
||||||
|

|
||||||
|
|
||||||
|
#### 12. Démarrage
|
||||||
|
|
||||||
|
Lorsque l’action est `start`, il faut d’abord allumer l’unité. Cela prend entre 20 et 40 secondes selon le modèle, et une fois démarrée, l’unité est verrouillée pendant un court laps de temps pour éviter les messages suivants.
|
||||||
|
|
||||||
|
Le premier nœud est un `call service node` utilisant le service `turn_on` sur l’unité de climatisation :
|
||||||
|

|
||||||
|
|
||||||
|
Le second nœud est un autre `call service node` qui va démarrer un minuteur de verrouillage (lock timer) pour cette unité pendant 45 secondes :
|
||||||
|

|
||||||
|
|
||||||
|
Le dernier est un `delay node` de 5 secondes, pour laisser le temps à l’intégration Daikin de Home Assistant de refléter le nouvel état.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 13. Changement
|
||||||
|
|
||||||
|
L’action `change` est utilisée pour passer d’un mode à un autre, mais aussi juste après l’allumage.
|
||||||
|
|
||||||
|
Le premier nœud est un `call service node` utilisant le service `set_hvac_mode` sur l’unité de climatisation :
|
||||||
|

|
||||||
|
|
||||||
|
Le nœud suivant est un `delay node` de 5 secondes.
|
||||||
|
|
||||||
|
Le dernier vérifie, avec un `switch node`, si la température cible doit être définie. Cela n’est nécessaire que pour les modes `cool` et `heat` :
|
||||||
|

|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### 14. Définir la Température Cible
|
||||||
|
|
||||||
|
La température cible est uniquement pertinente pour les modes `cool` et `heat`. Avec une climatisation classique, vous définissez une température à atteindre — c’est exactement ce qu’on fait ici. Mais comme chaque unité utilise son propre capteur interne pour vérifier cette température, je ne leur fais pas vraiment confiance. Si la température cible est déjà atteinte selon l’unité, elle ne soufflera plus du tout.
|
||||||
|
|
||||||
|
Le premier nœud est un autre `call service node` utilisant le service `set_temperature` :
|
||||||
|

|
||||||
|
|
||||||
|
Encore une fois, ce nœud est suivi d’un `delay node` de 5 secondes.
|
||||||
|
|
||||||
|
#### 15. Vérification
|
||||||
|
|
||||||
|
L’action `check` est utilisée presque tout le temps. Elle consiste uniquement à vérifier et comparer la vitesse de ventilation souhaitée, et à la modifier si nécessaire.
|
||||||
|
|
||||||
|
Le premier nœud est un `switch node` qui vérifie si la valeur `speed` est définie :
|
||||||
|

|
||||||
|
|
||||||
|
Le deuxième est un autre `switch node` qui compare la valeur `speed` avec la vitesse actuelle :
|
||||||
|

|
||||||
|
|
||||||
|
Enfin, le dernier nœud est un `call service node` utilisant le service `set_fan_mode` pour définir la vitesse du ventilateur :
|
||||||
|

|
||||||
|
|
||||||
|
#### 16. Arrêt
|
||||||
|
|
||||||
|
Lorsque l’action est `stop`, l’unité de climatisation est simplement arrêtée.
|
||||||
|
|
||||||
|
Le premier nœud est un `call service node` utilisant le service `turn_off` :
|
||||||
|

|
||||||
|
|
||||||
|
Le deuxième nœud est un autre `call service node` qui va démarrer le minuteur de verrouillage de cette unité pour 45 secondes.
|
||||||
|
|
||||||
|
#### 17. Intervention Manuelle
|
||||||
|
|
||||||
|
Parfois, pour une raison ou une autre, on souhaite utiliser la climatisation manuellement. Dans ce cas, on ne veut pas que le flux Node-RED vienne écraser notre réglage manuel, du moins pendant un certain temps.
|
||||||
|
Node-RED utilise son propre utilisateur dans Home Assistant, donc si une unité change d’état sans cet utilisateur, c’est qu’une intervention manuelle a eu lieu.
|
||||||
|
|
||||||
|
Le premier nœud est un `trigger state node`, qui envoie un message dès qu’une unité AC change d’état :
|
||||||
|

|
||||||
|
|
||||||
|
Le deuxième est un `function node` qui associe l’unité avec son minuteur :
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"climate.clim_salon": "timer.minuteur_clim_salon",
|
||||||
|
"climate.clim_chambre": "timer.minuteur_clim_chambre",
|
||||||
|
"climate.clim_couloir": "timer.minuteur_clim_couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
msg.payload = association[msg.topic];
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
Le troisième est un `switch node` qui laisse passer le message uniquement si le `user_id` **n’est pas** celui de Node-RED :
|
||||||
|

|
||||||
|
|
||||||
|
Le quatrième est un autre `switch node` qui vérifie que le champ `user_id` **est bien défini** :
|
||||||
|

|
||||||
|
|
||||||
|
Enfin, le dernier nœud est un `call service node` utilisant le service `start` sur le minuteur de l’unité, avec sa durée par défaut (60 minutes) :
|
||||||
|

|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
|
||||||
|
Avec cette configuration, mon système de climatisation est entièrement automatisé, du refroidissement en été au chauffage en hiver, tout en gardant un œil sur le taux d’humidité.
|
||||||
|
|
||||||
|
Cela m’a demandé pas mal de réflexion, d’ajustements et de tests, mais au final je suis vraiment satisfait du résultat. C’est pourquoi je le partage ici, pour vous donner des idées sur ce qu’on peut faire en domotique.
|
||||||
|
|
||||||
|
Si vous pensez que certaines choses pourraient être faites autrement, n’hésitez pas à me contacter pour en discuter ou me proposer de nouvelles idées !
|
||||||
|
|
700
content/post/6-ac-automation-home-assistant-node-red.md
Normal file
@@ -0,0 +1,700 @@
|
|||||||
|
---
|
||||||
|
slug: ac-automation-home-assistant-node-red
|
||||||
|
title: Full AC Automation with Home Assistant and Node-RED
|
||||||
|
description: How I automate my AC with Home Assistant and Node-RED to react to temperature, humidity and all daily events.
|
||||||
|
date: 2025-06-27
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- home-automation
|
||||||
|
- home-assistant
|
||||||
|
- node-red
|
||||||
|
categories:
|
||||||
|
- automation
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
In my apartment I have a Daikin air conditioning system, to cool it down in summer, but also warm it up in winter. It is composed of 3 indoor units:
|
||||||
|
- Living room
|
||||||
|
- Master bedroom
|
||||||
|
- Hallway (in front of my office and my kid's room)
|
||||||
|
|
||||||
|
I always find it boring to have to turn them on when I needed, I forgot to turn them off when I should and I was constantly chasing the remote.
|
||||||
|
|
||||||
|
What if I could automate it? After all, I already use Home Assistant to control many devices at home, controlling the AC seems natural to me.
|
||||||
|
|
||||||
|
### Home Assistant
|
||||||
|
|
||||||
|
Home Assistant is the brain of my smart home. It connects all my devices (lights, sensors, shutters, etc.) under a single interface. What makes it so powerful is the ability to create automations: if something happens, then do something else. Simple things like “turn on the kitchen light when the motion sensor is triggered” are a breeze. For more advanced workflows, it offers YAML-based scripts with conditions, delays, triggers, and templates.
|
||||||
|
|
||||||
|
That said, once automations start getting more complex, like reacting to multiple sensors, time ranges, or presence detection, they can quickly turn into long, hard-to-follow blocks of code. It’s easy to lose track of what does what, especially when you want to tweak just one small part weeks later.
|
||||||
|
|
||||||
|
### Node-RED
|
||||||
|
|
||||||
|
That’s exactly why I turned to Node-RED. It’s a visual tool that lets you build logic using blocks called “nodes”, which you connect with wires to create flows. Each node performs a small task: trigger at a certain time, check a condition, send a command to a device, etc. Instead of writing YAML, you just drag, drop, and connect.
|
||||||
|
|
||||||
|
Node-RED does not replace Home Assistant, it empowers it. I won't cover the installation of Node-RED neither the integration in HA, I've done that 2 years ago, but for that I remember, this is quite straightforward.
|
||||||
|
|
||||||
|
## Previous Workflow
|
||||||
|
|
||||||
|
I was already having a good solution to control my AC from Home Assistant with Node-RED, but I wanted to enhance it to also handle the humidity level at home. My current workflow, despite being functional, was not really scalable and quite hard to maintain:
|
||||||
|

|
||||||
|
|
||||||
|
## New Workflow
|
||||||
|
|
||||||
|
Instead of tweaking this workflow, I created a new one from scratch, with the same goal in mind: control the AC system by taking into account all available sensors: thermometers, humidity, door sensors, occupant presence, time of day, etc.
|
||||||
|
|
||||||
|
### Objectives
|
||||||
|
|
||||||
|
The idea is pretty simple: do not having to think about AC while still being efficient.
|
||||||
|
|
||||||
|
That being said, what does that mean? I want to keep the temperature and humidity level in check, whenever I'm here or not. If I open the windows, it should stop blowing. If it is too wet, I want to dry the air. If I turn the AC on or off manually, I don't want it to overwrite my setting. If it's night, I don't need to cool my living-room and I want it quiet, etc.
|
||||||
|
|
||||||
|
To help me achieve that, I'm using 4 [Aqara temperature and humidity sensors](https://eu.aqara.com/en-eu/products/aqara-temperature-and-humidity-sensor), one in each of my main room. I'm also using some [Aqara door sensors](https://eu.aqara.com/en-eu/products/aqara-door-and-window-sensor, to detect it windows are open.
|
||||||
|
|
||||||
|
### Workflow
|
||||||
|
|
||||||
|
Let me introduce my new AC workflow within Node-RED and explain what it does in detail:
|
||||||
|

|
||||||
|
|
||||||
|
#### 1. Temperature Sensors
|
||||||
|
|
||||||
|
In the first node, I combined all the temperature sensors together in one `trigger state node`, but I also added humidity levels in addition to the temperature, managed by the sensor. The node then contains 8 entities in a list (2 for each of my sensor). Each time one value change out of these 8 entities, the node is triggered:
|
||||||
|

|
||||||
|
|
||||||
|
Each of my temperature sensors are named with a color in French, because each has its own color sticker to distinguish them:
|
||||||
|
- **Jaune**: Living room
|
||||||
|
- **Bleu**: Bedroom
|
||||||
|
- **Rouge**: Office
|
||||||
|
- **Vert**: Kid's bedroom
|
||||||
|
|
||||||
|
The second node is a `function node` which has the role the determine the room of the sensor (`function node` is written in **JavaScript**):
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"temperature_jaune": "salon",
|
||||||
|
"temperature_bleu": "chambre",
|
||||||
|
"temperature_rouge": "couloir",
|
||||||
|
"temperature_vert": "couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
// Match pattern like: sensor.temperature_rouge_temperature
|
||||||
|
const match = msg.topic.match(/^sensor\.(.+)_(temperature|humidity)$/);
|
||||||
|
|
||||||
|
if (!match) {
|
||||||
|
node.warn("Topic format not recognized: " + msg.topic);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.payload = {
|
||||||
|
room: association[match[1]],
|
||||||
|
sensor: match[1]
|
||||||
|
};
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
For the last node, most of the time, the sensors will send two messages at the same time, one containing the temperature value and the other, the humidity level. I added a `join node` to combined the two messages if they are sent within the same second:
|
||||||
|

|
||||||
|
|
||||||
|
#### 2. Notification
|
||||||
|
|
||||||
|
It can happen that the temperature sensors are not sending states anymore for some reason. In that case, they will always return their last value, which would lock the associated AC unit.
|
||||||
|
|
||||||
|
The workaround I found effective is to send a notification if the sensor did not send a new value in the last 3 hours. In normal situation, the sensor send an update approximately every 15 minutes.
|
||||||
|
|
||||||
|
The first node is a `function node` a bit tricky which will generate flow variable as timer for each sensor. When the timeout is reach, it sends a message to the next node:
|
||||||
|
```js
|
||||||
|
const sensor = msg.payload.sensor;
|
||||||
|
const timeoutKey = `watchdog_${sensor}`;
|
||||||
|
const messages = {
|
||||||
|
"temperature_jaune": {"title": "Température Salon", "message": "Capteur de température du salon semble hors service"},
|
||||||
|
"temperature_bleu": {"title": "Température Chambre", "message": "Capteur de température de la chambre semble hors service"},
|
||||||
|
"temperature_rouge": {"title": "Température Bureau", "message": "Capteur de température du bureau semble hors service"},
|
||||||
|
"temperature_vert": {"title": "Température Raphaël", "message": "Capteur de température de Raphaël semble hors service"}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Clear existing timer
|
||||||
|
const existing = flow.get(timeoutKey);
|
||||||
|
if (existing) clearTimeout(existing);
|
||||||
|
|
||||||
|
// Set new timer
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
node.send({
|
||||||
|
payload: `⚠️ No update from ${sensor} in 3 hours.`,
|
||||||
|
sensor: sensor,
|
||||||
|
title: messages[sensor]["title"],
|
||||||
|
message: messages[sensor]["message"]
|
||||||
|
});
|
||||||
|
}, 3 * 60 * 60 * 1000); // 3 hours
|
||||||
|
|
||||||
|
flow.set(timeoutKey, timer);
|
||||||
|
|
||||||
|
return null; // Don't send anything now
|
||||||
|
```
|
||||||
|
|
||||||
|
The second node is a `call service node` which send a notification on my Android device with the value given:
|
||||||
|

|
||||||
|
|
||||||
|
#### 3. Temperature Sliders
|
||||||
|
|
||||||
|
To have a control over the temperature without having to change the workflow, I created two Home Assistant helper, as number, which I can adjust for each unit, giving me 6 helpers in total:
|
||||||
|

|
||||||
|
|
||||||
|
These values are the base temperature used for the calculation of the threshold, depending off the offset which I will detail further.
|
||||||
|
|
||||||
|
The first node is a `trigger state node`, with all 6 entities combined. If I change one value, the node is triggered:
|
||||||
|

|
||||||
|
|
||||||
|
The second node is a `function node`, to determine the room affected:
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"input_number.temp_ete_salon": "salon",
|
||||||
|
"input_number.temp_hiver_salon": "salon",
|
||||||
|
"input_number.temp_ete_chambre": "chambre",
|
||||||
|
"input_number.temp_hiver_chambre": "chambre",
|
||||||
|
"input_number.temp_ete_couloir": "couloir",
|
||||||
|
"input_number.temp_hiver_couloir": "couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
msg.payload = { room: association[msg.topic] };
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 4. Toggles
|
||||||
|
|
||||||
|
In Home Assistant, I'm using other helper but as boolean, the most important is the AC one, where I can manually disable the whole workflow. I have other which are automated, for the time of the day or for detect presence at home.
|
||||||
|
|
||||||
|
I have another `trigger state node` with all my toggles as boolean, including a test button, for debug purpose:
|
||||||
|

|
||||||
|
|
||||||
|
As toggles affect the whole apartment and not a single unit, the next node is a `change node`, which set the room value to `partout` (everywhere):
|
||||||
|

|
||||||
|
|
||||||
|
#### 5. Windows
|
||||||
|
|
||||||
|
The last triggers are my windows, if I open or close a window next to my unit, it triggers the workflow. I have door sensor for some of my doors, but for the hallway unit, I'm using the Velux windows state. Some rooms have more than one, I created a group helper for them.
|
||||||
|
|
||||||
|
The first node is the last `trigger state node`, the returned value is a string which I will have to convert later into boolean:
|
||||||
|

|
||||||
|
|
||||||
|
Connected to it, again a `function node` to select the affect room:
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"binary_sensor.groupe_fenetre_salon": "salon",
|
||||||
|
"binary_sensor.fenetre_chambre_contact": "chambre",
|
||||||
|
"cover.groupe_fenetre_couloir": "couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
msg.payload = {
|
||||||
|
room: association[msg.topic]
|
||||||
|
};
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 6. Window Watchdog
|
||||||
|
|
||||||
|
When I open a window, it is not necessarily to let it open for a long time. I could just let the cat out or having a look at my portal. I don't want my AC tuned off as soon as open it. To workaround that I created a watchdog for each unit, to delay the message for some time.
|
||||||
|
|
||||||
|
The first node is a `switch node`, based on the room given by the previous node, it will send the message to the associated watchdog:
|
||||||
|

|
||||||
|
|
||||||
|
After are the watchdogs, `trigger nodes`, which will delay the message by some time and extend the delay if another message if received:
|
||||||
|

|
||||||
|
|
||||||
|
#### 7. AC Enabled ?
|
||||||
|
|
||||||
|
All these triggers are now entering the computing pipeline, to determine what the system must do with the action. But before, it is checking if the automation is even enabled. I add this kill switch, just in case, but I rarely use it anyway.
|
||||||
|
|
||||||
|
The first node is a `delay node` which regulate the rate of every incoming messages to 1 per second:
|
||||||
|

|
||||||
|
|
||||||
|
The second node is a `current state node` which checks if the `climatisation` boolean is enabled:
|
||||||
|

|
||||||
|
#### 8. Room Configuration
|
||||||
|
|
||||||
|
The idea here is to attach the configuration of the room to the message. Each room have their own configuration, which unit is used, which sensors and more importantly, when should they be turned on and off.
|
||||||
|
|
||||||
|
AC units have 4 mode which can be used:
|
||||||
|
- Cool
|
||||||
|
- Dry
|
||||||
|
- Fan
|
||||||
|
- Heat
|
||||||
|
|
||||||
|
To determine which mode should be used, I'm using threshold for each mode and unit fan's speed, with different offset depending the situation. I can then define a offset during the night or when I'm away. I can also set the offset to `disabled`, which will force the unit to shut down.
|
||||||
|
|
||||||
|
The first node is a `switch node`, based on the `room` value, which will route the message to the associated room configuration. When the room is `partout` (everywhere), the message is split to all 3 room configuration:
|
||||||
|

|
||||||
|
|
||||||
|
It is connected to a `change node` which will attach the configuration to the `room_config`, here an example with the living-room configuration:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"threshold": {
|
||||||
|
"cool": {
|
||||||
|
"start": {
|
||||||
|
"1": 1,
|
||||||
|
"2": 1.5,
|
||||||
|
"3": 2,
|
||||||
|
"4": 2.5,
|
||||||
|
"quiet": 0
|
||||||
|
},
|
||||||
|
"stop": -0.3,
|
||||||
|
"target": -1,
|
||||||
|
"offset": {
|
||||||
|
"absent": 1,
|
||||||
|
"vacances": "disabled",
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": "disabled",
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": "disabled"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"dry": {
|
||||||
|
"start": {
|
||||||
|
"quiet": -1
|
||||||
|
},
|
||||||
|
"stop": -1.5,
|
||||||
|
"offset": {
|
||||||
|
"absent": "1.5",
|
||||||
|
"vacances": "disabled",
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": "disabled",
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": "disabled"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"fan_only": {
|
||||||
|
"start": {
|
||||||
|
"1": -0.3,
|
||||||
|
"quiet": -0.5
|
||||||
|
},
|
||||||
|
"stop": -0.7,
|
||||||
|
"offset": {
|
||||||
|
"absent": "disabled",
|
||||||
|
"vacances": "disabled",
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": "disabled",
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": "disabled"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"heat": {
|
||||||
|
"start": {
|
||||||
|
"1": 0,
|
||||||
|
"2": -1.5,
|
||||||
|
"quiet": 0
|
||||||
|
},
|
||||||
|
"stop": 1,
|
||||||
|
"target": 1,
|
||||||
|
"offset": {
|
||||||
|
"absent": -1.5,
|
||||||
|
"vacances": -3,
|
||||||
|
"fenetre": "disabled",
|
||||||
|
"matin": 0,
|
||||||
|
"jour": 0,
|
||||||
|
"soir": 0,
|
||||||
|
"nuit": -1.5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"unit": "climate.clim_salon",
|
||||||
|
"timer": "timer.minuteur_clim_salon",
|
||||||
|
"window": "binary_sensor.groupe_fenetre_salon",
|
||||||
|
"thermometre": "sensor.temperature_jaune_temperature",
|
||||||
|
"humidity": "sensor.temperature_jaune_humidity",
|
||||||
|
"temp_ete": "input_number.temp_ete_salon",
|
||||||
|
"temp_hiver": "input_number.temp_hiver_salon"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 9. Computation
|
||||||
|
|
||||||
|
Now that the message has the room configuration attached, we are entering in the computation pipeline. We have the AC unit name, the sensor names, the desired base temperature and the offset to apply. From these values, we will fetch the current state and do the maths.
|
||||||
|
|
||||||
|
The first node is another `delay node` which regulate the rate of incoming messages, because the previous block could have created 3 messages in all rooms are targeted.
|
||||||
|
|
||||||
|
The second is the most important node of the workflow, a `function node` that has multiple tasks:
|
||||||
|
- Fetch sensor state for Home Assistant
|
||||||
|
- Calculate mode thresholds with given offset
|
||||||
|
- Disable modes if conditions are met
|
||||||
|
- Inject these values in the payload
|
||||||
|
```js
|
||||||
|
// --- Helper: Get Home Assistant state by entity ID ---
|
||||||
|
function getState(entityId) {
|
||||||
|
return global.get("homeassistant.homeAssistant.states")[entityId]?.state;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Determine current time period based on sensors ---
|
||||||
|
const periods = ["jour", "soir", "nuit", "matin"];
|
||||||
|
msg.payload.period = periods.find(p => getState(`binary_sensor.${p}`) === 'on') || 'unknown';
|
||||||
|
|
||||||
|
// --- Determine presence status (absent = inverse of presence) ---
|
||||||
|
const vacances = getState("input_boolean.absent");
|
||||||
|
const absent = getState("input_boolean.presence") === 'on' ? 'off' : 'on';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively adds the base temperature and offset to all numeric start values in a threshold config
|
||||||
|
*/
|
||||||
|
function applyOffsetToThresholds(threshold, baseTemp, globalOffset) {
|
||||||
|
for (const [key, value] of Object.entries(threshold)) {
|
||||||
|
if (key === "offset") continue;
|
||||||
|
|
||||||
|
if (typeof value === 'object') {
|
||||||
|
applyOffsetToThresholds(value, baseTemp, globalOffset);
|
||||||
|
} else {
|
||||||
|
threshold[key] += baseTemp + globalOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculates the global offset for a mode, based on presence, vacation, window, and time of day
|
||||||
|
*/
|
||||||
|
function calculateGlobalOffset(offsets, modeName, windowState, disabledMap) {
|
||||||
|
let globalOffset = 0;
|
||||||
|
|
||||||
|
for (const [key, offsetValue] of Object.entries(offsets)) {
|
||||||
|
let conditionMet = false;
|
||||||
|
|
||||||
|
if (key === msg.payload.period) conditionMet = true;
|
||||||
|
else if (key === "absent" && absent === 'on') conditionMet = true;
|
||||||
|
else if (key === "vacances" && vacances === 'on') conditionMet = true;
|
||||||
|
else if ((key === "fenetre" || key === "window") && windowState === 'on') conditionMet = true;
|
||||||
|
|
||||||
|
if (conditionMet) {
|
||||||
|
if (offsetValue === 'disabled') {
|
||||||
|
disabledMap[modeName] = true;
|
||||||
|
return 0; // Mode disabled immediately
|
||||||
|
}
|
||||||
|
|
||||||
|
globalOffset += parseFloat(offsetValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return globalOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main logic: compute thresholds for the specified room using the provided config
|
||||||
|
*/
|
||||||
|
const cfg = msg.payload.room_config;
|
||||||
|
const room = msg.payload.room;
|
||||||
|
|
||||||
|
// Normalize window sensor state
|
||||||
|
const rawWindow = getState(cfg.window);
|
||||||
|
const window = rawWindow === 'open' ? 'on' : rawWindow === 'closed' ? 'off' : rawWindow;
|
||||||
|
|
||||||
|
// Gather temperatures
|
||||||
|
const temps = cfg.thermometre.split(',')
|
||||||
|
.map(id => parseFloat(getState(id)))
|
||||||
|
.filter(v => !isNaN(v));
|
||||||
|
|
||||||
|
const temp_avg = temps.reduce((a, b) => a + b, 0) / temps.length;
|
||||||
|
const temp_min = Math.min(...temps);
|
||||||
|
const temp_max = Math.max(...temps);
|
||||||
|
|
||||||
|
// Gather humidity
|
||||||
|
const humidities = cfg.humidity.split(',')
|
||||||
|
.map(id => parseFloat(getState(id)))
|
||||||
|
.filter(v => !isNaN(v));
|
||||||
|
|
||||||
|
const humidity_avg = humidities.reduce((a, b) => a + b, 0) / humidities.length;
|
||||||
|
const humidity_min = Math.min(...humidities);
|
||||||
|
const humidity_max = Math.max(...humidities);
|
||||||
|
|
||||||
|
// Get base temps
|
||||||
|
const temp_ete = parseFloat(getState(cfg.temp_ete));
|
||||||
|
const temp_hiver = parseFloat(getState(cfg.temp_hiver));
|
||||||
|
|
||||||
|
// Process modes
|
||||||
|
const { threshold } = cfg;
|
||||||
|
const modes = ["cool", "dry", "fan_only", "heat"];
|
||||||
|
const disabled = {};
|
||||||
|
|
||||||
|
for (const mode of modes) {
|
||||||
|
const baseTemp = (mode === "heat") ? temp_hiver : temp_ete;
|
||||||
|
const globalOffset = calculateGlobalOffset(threshold[mode].offset, mode, window, disabled);
|
||||||
|
|
||||||
|
applyOffsetToThresholds(threshold[mode], baseTemp, globalOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final message
|
||||||
|
msg.payload = {
|
||||||
|
...msg.payload,
|
||||||
|
unit: cfg.unit,
|
||||||
|
timer: cfg.timer,
|
||||||
|
threshold,
|
||||||
|
window,
|
||||||
|
temp: {
|
||||||
|
min: temp_min,
|
||||||
|
max: temp_max,
|
||||||
|
avg: Math.round(temp_avg * 100) / 100
|
||||||
|
},
|
||||||
|
humidity: {
|
||||||
|
min: humidity_min,
|
||||||
|
max: humidity_max,
|
||||||
|
avg: Math.round(humidity_avg * 100) / 100
|
||||||
|
},
|
||||||
|
disabled
|
||||||
|
};
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
The third node is a `filter node`, which drops subsequent messages with similar payload:
|
||||||
|

|
||||||
|
|
||||||
|
The fourth node checks if any lock is set, with a `current state node`, we verify if the timer associated to the unit is idle. If not, the message is discarded:
|
||||||
|

|
||||||
|
|
||||||
|
The last node is another `current state node` which will fetch the unit state and properties:
|
||||||
|

|
||||||
|
|
||||||
|
#### 10. Target State
|
||||||
|
|
||||||
|
After the computation, we want to determine what should be the target mode, what action to do to converge from the current mode and, if apply, what should be the fan's speed for that mode.
|
||||||
|
|
||||||
|
All three nodes are `function nodes`, the first one decides what should be the target mode, between: `off`, `cool`, `dry`, `fan_only` and `heat`:
|
||||||
|
```js
|
||||||
|
const minHumidityThreshold = 52;
|
||||||
|
const maxHumidityThreshold = 57;
|
||||||
|
|
||||||
|
// Helper: check if mode can be activated or stopped
|
||||||
|
function isModeEligible(mode, temps, humidity, thresholds, currentMode) {
|
||||||
|
const isCurrent = (mode === currentMode);
|
||||||
|
const threshold = thresholds[mode];
|
||||||
|
|
||||||
|
if (msg.payload.disabled?.[mode]) return false;
|
||||||
|
|
||||||
|
// Determine which temperature to use for start/stop:
|
||||||
|
// start: temp.max (except heat uses temp.min)
|
||||||
|
// stop: temp.avg
|
||||||
|
let tempForCheckStart;
|
||||||
|
if (mode === "heat") {
|
||||||
|
tempForCheckStart = temps.min; // heat start uses min temp
|
||||||
|
} else {
|
||||||
|
tempForCheckStart = temps.max; // others start use max temp
|
||||||
|
}
|
||||||
|
const tempForCheckStop = temps.avg;
|
||||||
|
|
||||||
|
// Dry mode also depends on humidity thresholds
|
||||||
|
// humidity max for start, humidity avg for stop
|
||||||
|
let humidityForCheckStart = humidity.max;
|
||||||
|
let humidityForCheckStop = humidity.avg;
|
||||||
|
|
||||||
|
// For heat mode (inverted logic)
|
||||||
|
if (mode === "heat") {
|
||||||
|
if (!isCurrent) {
|
||||||
|
const minStart = Math.min(...Object.values(threshold.start));
|
||||||
|
return tempForCheckStart < minStart;
|
||||||
|
} else {
|
||||||
|
return tempForCheckStop < threshold.stop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For dry mode (humidity-dependent)
|
||||||
|
if (mode === "dry") {
|
||||||
|
// Skip if humidity too low
|
||||||
|
if (humidityForCheckStart <= (isCurrent ? minHumidityThreshold : maxHumidityThreshold)) return false;
|
||||||
|
|
||||||
|
const minStart = Math.min(...Object.values(threshold.start));
|
||||||
|
if (!isCurrent) {
|
||||||
|
return tempForCheckStart >= minStart;
|
||||||
|
} else {
|
||||||
|
return tempForCheckStop >= threshold.stop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For cool and fan_only
|
||||||
|
if (!isCurrent) {
|
||||||
|
const minStart = Math.min(...Object.values(threshold.start));
|
||||||
|
return tempForCheckStart >= minStart;
|
||||||
|
} else {
|
||||||
|
return tempForCheckStop >= threshold.stop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Main logic ---
|
||||||
|
const { threshold, temp, humidity, current_mode, disabled } = msg.payload;
|
||||||
|
|
||||||
|
const priority = ["cool", "dry", "fan_only", "heat"];
|
||||||
|
let target_mode = "off";
|
||||||
|
|
||||||
|
// Loop through priority list and stop at the first eligible mode
|
||||||
|
for (const mode of priority) {
|
||||||
|
if (isModeEligible(mode, temp, humidity, threshold, current_mode)) {
|
||||||
|
target_mode = mode;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.payload.target_mode = target_mode;
|
||||||
|
|
||||||
|
if (target_mode === "cool" || target_mode === "heat") {
|
||||||
|
msg.payload.set_temp = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
The second compares the current and target node and pick which action to take:
|
||||||
|
- **check**: current and target are the same.
|
||||||
|
- **start**: the AC unit is currently off, but the target is different.
|
||||||
|
- **change**: the AC unit is on, the target mode is different, but not `off`.
|
||||||
|
- **stop**: the AC unit is on and it is required to stop it.
|
||||||
|
```js
|
||||||
|
let action = "check"; // default if both are same
|
||||||
|
|
||||||
|
if (msg.payload.current_mode === "off" && msg.payload.target_mode !== "off") {
|
||||||
|
action = "start";
|
||||||
|
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode !== "off" && msg.payload.current_mode !== msg.payload.target_mode) {
|
||||||
|
action = "change";
|
||||||
|
} else if (msg.payload.current_mode !== "off" && msg.payload.target_mode === "off") {
|
||||||
|
action = "stop";
|
||||||
|
}
|
||||||
|
|
||||||
|
msg.payload.action = action;
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
The last node determines the fan's speed of the target mode based on thresholds:
|
||||||
|
```js
|
||||||
|
// Function to find the appropriate speed key based on temperature and mode
|
||||||
|
function findSpeed(thresholdStart, temperature, mode) {
|
||||||
|
let closestSpeed = 'quiet';
|
||||||
|
let closestTemp = mode === 'heat' ? Infinity : -Infinity;
|
||||||
|
|
||||||
|
for (const speedKey in thresholdStart) {
|
||||||
|
if (speedKey !== 'quiet') {
|
||||||
|
const tempValue = thresholdStart[speedKey];
|
||||||
|
if (mode === 'heat') {
|
||||||
|
if (tempValue >= temperature && tempValue <= closestTemp) {
|
||||||
|
closestSpeed = speedKey;
|
||||||
|
closestTemp = tempValue;
|
||||||
|
}
|
||||||
|
} else { // cool, fan_only
|
||||||
|
if (tempValue <= temperature && tempValue >= closestTemp) {
|
||||||
|
closestSpeed = speedKey;
|
||||||
|
closestTemp = tempValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return closestSpeed;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (msg.payload.target_mode && msg.payload.target_mode !== "off" && msg.payload.target_mode !== "dry") {
|
||||||
|
const modeData = msg.payload.threshold[msg.payload.target_mode];
|
||||||
|
if (modeData && modeData.start) {
|
||||||
|
if (msg.payload.target_mode === "heat") {
|
||||||
|
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.min, 'heat');
|
||||||
|
} else {
|
||||||
|
msg.payload.speed = findSpeed(modeData.start, msg.payload.temp.max, 'cool');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
node.error("Invalid mode data or missing 'start' thresholds", msg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No need for speed in 'off' or 'dry' modes
|
||||||
|
msg.payload.speed = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 11. Action Switch
|
||||||
|
|
||||||
|
Based on the action to take, the `switch node` will route the message accordingly:
|
||||||
|

|
||||||
|
|
||||||
|
#### 12. Start
|
||||||
|
|
||||||
|
When the action is `start`, we first need to turn the unit online, while this takes between 20 to 40 seconds depending on the unit model, it is also locking the unit for a short period for future messages.
|
||||||
|
|
||||||
|
The first node is a `call service node` using the `turn_on` service on the AC unit:
|
||||||
|

|
||||||
|
|
||||||
|
The second node is another `call service node` which will start the lock timer of this unit for 45 seconds:
|
||||||
|

|
||||||
|
|
||||||
|
The last one is a `delay node` of 5 seconds, to give the time to the Home Assistant Daikin integration to resolve the new state.
|
||||||
|
|
||||||
|
#### 13. Change
|
||||||
|
|
||||||
|
The `change` action is used to change from one mode to another, but also used right after the start action.
|
||||||
|
|
||||||
|
The first node is a `call service node` using `the set_hvac_mode` service on the AC unit:
|
||||||
|

|
||||||
|
|
||||||
|
The following node is another delay of 5 seconds.
|
||||||
|
|
||||||
|
The last one verify with a `switch node` if the target temperature needs to be set, this is only required for the modes `cool` and `heat`:
|
||||||
|

|
||||||
|
|
||||||
|
#### 14. Set Target Temperature
|
||||||
|
|
||||||
|
The target temperature is only relevant for `cool` and `heat` mode, when you use a normal AC unit, you define a temperature to reach. This is exactly what is defined here. But because each unit is using its own internal sensor to verify, I don't trust it. If the value is already reached, the unit won't blow anything.
|
||||||
|
|
||||||
|
The first node is another `call service node` using the `set_temperature` service:
|
||||||
|

|
||||||
|
|
||||||
|
Again, this node is followed by a `delay node` of 5 seconds
|
||||||
|
|
||||||
|
#### 15. Check
|
||||||
|
|
||||||
|
The `check` action is almost used everytime, it is actually only checks and compare the desired fan speed, it changes the fan speed if needed.
|
||||||
|
|
||||||
|
The first node is a `switch node` which verify if the `speed` is defined:
|
||||||
|

|
||||||
|
|
||||||
|
The second is another `switch node` to compare the `speed` value with the current speed:
|
||||||
|

|
||||||
|
|
||||||
|
Finally the last node is a `call service node` using the `set_fan_mode` to set the fan speed:
|
||||||
|

|
||||||
|
|
||||||
|
#### 16. Stop
|
||||||
|
|
||||||
|
When the `action` is stop, the AC unit is simply turned off
|
||||||
|
|
||||||
|
The first node is a `call service noded` using the service `turn_off`:
|
||||||
|

|
||||||
|
|
||||||
|
The second node is another `call service node` which will start the lock timer of this unit for 45 seconds
|
||||||
|
|
||||||
|
#### 17. Manual Intervention
|
||||||
|
|
||||||
|
Sometime, for some reason, we want to use the AC manually. When we do, we don't want the workflow to change our manual setting, at least for some time. Node-RED is using its own user in Home Assistant, so when an AC unit change state without this user, this was manually done.
|
||||||
|
|
||||||
|
The first node is a `trigger state node`, which will send a message when any AC unit is changing state:
|
||||||
|

|
||||||
|
|
||||||
|
The second is a `function node` which willassociate the unit with its timer:
|
||||||
|
```js
|
||||||
|
const association = {
|
||||||
|
"climate.clim_salon": "timer.minuteur_clim_salon",
|
||||||
|
"climate.clim_chambre": "timer.minuteur_clim_chambre",
|
||||||
|
"climate.clim_couloir": "timer.minuteur_clim_couloir"
|
||||||
|
};
|
||||||
|
|
||||||
|
msg.payload = association[msg.topic];
|
||||||
|
return msg;
|
||||||
|
```
|
||||||
|
|
||||||
|
The third is a `switch node` that will let through the message when the user_id is not the Node-RED user's one:
|
||||||
|

|
||||||
|
|
||||||
|
The fourth is another `switch node` which checks if there are any `user_id`:
|
||||||
|

|
||||||
|
|
||||||
|
Lastly, the final node is a `call service node` using `start` service on the unit's timer with its default duration (60 minutes):
|
||||||
|

|
||||||
|
|
||||||
|
## TL;DR
|
||||||
|
|
||||||
|
With this setup, my AC system is fully automated, from cooling in summer to warming in winter, while keeping in check the humidity level.
|
||||||
|
|
||||||
|
This required quite a lot of thinking, tweaking and testing, but finally I'm now very happy with the results, that's why I'm sharing it with you, to give you some ideas about what you can do in home automation.
|
||||||
|
|
||||||
|
If you think I could have done things differently, please reach out to me to discuss about it, do not hesitate to share your ideas as well!
|
757
content/post/7-terraform-create-proxmox-module.fr.md
Normal file
@@ -0,0 +1,757 @@
|
|||||||
|
---
|
||||||
|
slug: terraform-create-proxmox-module
|
||||||
|
title: Créer un Module Terraform pour Proxmox
|
||||||
|
description: Transformez votre code VM Proxmox en module Terraform réutilisable et apprenez à déployer à l'échelle sur plusieurs nœuds.
|
||||||
|
date: 2025-07-04
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- terraform
|
||||||
|
- proxmox
|
||||||
|
- cloud-init
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Dans un [article précédent]({{< ref "post/3-terraform-create-vm-proxmox" >}}), j’expliquais comment déployer des **machines virtuelles** sur **Proxmox** à l’aide de **Terraform**, en partant d’un [template cloud-init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
|
||||||
|
|
||||||
|
Dans ce post, nous allons transformer ce code en un **module Terraform** réutilisable. Ensuite, je montrerai comment utiliser ce module dans d'autres projets pour simplifier et faire évoluer vos déploiements d'infrastructure.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Qu’est-ce qu’un Module Terraform ?
|
||||||
|
|
||||||
|
Les modules Terraform sont des composants réutilisables qui permettent d’organiser et de simplifier votre code d’infrastructure en regroupant des ressources liées dans une seule unité. Au lieu de répéter la même configuration à plusieurs endroits, vous pouvez la définir une fois dans un module, puis l’utiliser là où vous en avez besoin, comme une fonction en programmation.
|
||||||
|
|
||||||
|
Les modules peuvent être locaux (dans votre projet) ou distants (depuis le Terraform Registry ou un dépôt Git), ce qui facilite le partage et la standardisation des patterns d’infrastructure entre les équipes ou projets. Grâce aux modules, votre code devient plus lisible, maintenable et évolutif.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Transformer le Projet en Module
|
||||||
|
|
||||||
|
Nous allons maintenant extraire le code Terraform du [projet précédent]({{< ref "post/3-terraform-create-vm-proxmox" >}}) pour en faire un module réutilisable nommé `pve_vm`.
|
||||||
|
|
||||||
|
> 📌 Vous pouvez retrouver le code source complet dans mon [dépôt Homelab](https://github.com/Vezpi/Homelab/). Le code spécifique à cet article se trouve [ici](https://github.com/Vezpi/Homelab/tree/3a991010d5e9de30e12cbf365d1a1ca1ff1f6436/terraform). Pensez à adapter les variables à votre environnement.
|
||||||
|
|
||||||
|
### Structure du Code
|
||||||
|
|
||||||
|
Notre module vivra à côté des projets, dans un dossier séparé.
|
||||||
|
```plaintext
|
||||||
|
terraform
|
||||||
|
`-- modules
|
||||||
|
`-- pve_vm
|
||||||
|
|-- main.tf
|
||||||
|
|-- provider.tf
|
||||||
|
`-- variables.tf
|
||||||
|
```
|
||||||
|
### Code du Module
|
||||||
|
|
||||||
|
📝 Les fichiers du module sont essentiellement les mêmes que ceux du projet que nous transformons. Les providers y sont déclarés, mais non configurés.
|
||||||
|
|
||||||
|
Le module `pve_vm` sera composé de 3 fichiers :
|
||||||
|
- **main** : la logique principale, identique à celle du projet.
|
||||||
|
- **provider** : déclare les providers requis, sans les configurer.
|
||||||
|
- **variables** : déclare les variables du module, en excluant celles propres au provider.
|
||||||
|
|
||||||
|
|
||||||
|
#### `main.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
# Retrieve VM templates available in Proxmox that match the specified name
|
||||||
|
data "proxmox_virtual_environment_vms" "template" {
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["${var.vm_template}"] # The name of the template to clone from
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a cloud-init configuration file as a Proxmox snippet
|
||||||
|
resource "proxmox_virtual_environment_file" "cloud_config" {
|
||||||
|
content_type = "snippets" # Cloud-init files are stored as snippets in Proxmox
|
||||||
|
datastore_id = "local" # Local datastore used to store the snippet
|
||||||
|
node_name = var.node_name # The Proxmox node where the file will be uploaded
|
||||||
|
|
||||||
|
source_raw {
|
||||||
|
file_name = "${var.vm_name}.cloud-config.yaml" # The name of the snippet file
|
||||||
|
data = <<-EOF
|
||||||
|
#cloud-config
|
||||||
|
hostname: ${var.vm_name}
|
||||||
|
package_update: true
|
||||||
|
package_upgrade: true
|
||||||
|
packages:
|
||||||
|
- qemu-guest-agent # Ensures the guest agent is installed
|
||||||
|
users:
|
||||||
|
- default
|
||||||
|
- name: ${var.vm_user}
|
||||||
|
groups: sudo
|
||||||
|
shell: /bin/bash
|
||||||
|
ssh-authorized-keys:
|
||||||
|
- "${var.vm_user_sshkey}" # Inject user's SSH key
|
||||||
|
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||||
|
runcmd:
|
||||||
|
- systemctl enable qemu-guest-agent
|
||||||
|
- reboot # Reboot the VM after provisioning
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define and provision a new VM by cloning the template and applying initialization
|
||||||
|
resource "proxmox_virtual_environment_vm" "vm" {
|
||||||
|
name = var.vm_name # VM name
|
||||||
|
node_name = var.node_name # Proxmox node to deploy the VM
|
||||||
|
tags = var.vm_tags # Optional VM tags for categorization
|
||||||
|
|
||||||
|
agent {
|
||||||
|
enabled = true # Enable the QEMU guest agent
|
||||||
|
}
|
||||||
|
|
||||||
|
stop_on_destroy = true # Ensure VM is stopped gracefully when destroyed
|
||||||
|
|
||||||
|
clone {
|
||||||
|
vm_id = data.proxmox_virtual_environment_vms.template.vms[0].vm_id # ID of the source template
|
||||||
|
node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
|
||||||
|
}
|
||||||
|
|
||||||
|
bios = var.vm_bios # BIOS type (e.g., seabios or ovmf)
|
||||||
|
machine = var.vm_machine # Machine type (e.g., q35)
|
||||||
|
|
||||||
|
cpu {
|
||||||
|
cores = var.vm_cpu # Number of CPU cores
|
||||||
|
type = "host" # Use host CPU type for best compatibility/performance
|
||||||
|
}
|
||||||
|
|
||||||
|
memory {
|
||||||
|
dedicated = var.vm_ram # RAM in MB
|
||||||
|
}
|
||||||
|
|
||||||
|
disk {
|
||||||
|
datastore_id = var.node_datastore # Datastore to hold the disk
|
||||||
|
interface = "scsi0" # Primary disk interface
|
||||||
|
size = 4 # Disk size in GB
|
||||||
|
}
|
||||||
|
|
||||||
|
initialization {
|
||||||
|
user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
|
||||||
|
datastore_id = var.node_datastore
|
||||||
|
interface = "scsi1" # Separate interface for cloud-init
|
||||||
|
ip_config {
|
||||||
|
ipv4 {
|
||||||
|
address = "dhcp" # Get IP via DHCP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
network_device {
|
||||||
|
bridge = "vmbr0" # Use the default bridge
|
||||||
|
vlan_id = var.vm_vlan # VLAN tagging if used
|
||||||
|
}
|
||||||
|
|
||||||
|
operating_system {
|
||||||
|
type = "l26" # Linux 2.6+ kernel
|
||||||
|
}
|
||||||
|
|
||||||
|
vga {
|
||||||
|
type = "std" # Standard VGA type
|
||||||
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [ # Ignore initialization section after first depoloyment for idempotency
|
||||||
|
initialization
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Output the assigned IP address of the VM after provisioning
|
||||||
|
output "vm_ip" {
|
||||||
|
value = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
|
||||||
|
description = "VM IP"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `provider.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
proxmox = {
|
||||||
|
source = "bpg/proxmox"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `variables.tf`
|
||||||
|
|
||||||
|
> ⚠️ The defaults are based on my environment, adapt them to yours.
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
variable "node_name" {
|
||||||
|
description = "Proxmox host for the VM"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "node_datastore" {
|
||||||
|
description = "Datastore used for VM storage"
|
||||||
|
type = string
|
||||||
|
default = "ceph-workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_template" {
|
||||||
|
description = "Template of the VM"
|
||||||
|
type = string
|
||||||
|
default = "ubuntu-cloud"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_name" {
|
||||||
|
description = "Hostname of the VM"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_user" {
|
||||||
|
description = "Admin user of the VM"
|
||||||
|
type = string
|
||||||
|
default = "vez"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_user_sshkey" {
|
||||||
|
description = "Admin user SSH key of the VM"
|
||||||
|
type = string
|
||||||
|
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_cpu" {
|
||||||
|
description = "Number of CPU cores of the VM"
|
||||||
|
type = number
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_ram" {
|
||||||
|
description = "Number of RAM (MB) of the VM"
|
||||||
|
type = number
|
||||||
|
default = 2048
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_bios" {
|
||||||
|
description = "Type of BIOS used for the VM"
|
||||||
|
type = string
|
||||||
|
default = "ovmf"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_machine" {
|
||||||
|
description = "Type of machine used for the VM"
|
||||||
|
type = string
|
||||||
|
default = "q35"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_vlan" {
|
||||||
|
description = "VLAN of the VM"
|
||||||
|
type = number
|
||||||
|
default = 66
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_tags" {
|
||||||
|
description = "Tags for the VM"
|
||||||
|
type = list(any)
|
||||||
|
default = ["test"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Déployer une VM à l’aide du Module
|
||||||
|
|
||||||
|
Maintenant que nous avons extrait toute la logique dans le module `pve_vm`, notre projet n’a plus qu’à appeler ce module en lui passant les variables nécessaires. Cela rend la configuration bien plus propre et facile à maintenir.
|
||||||
|
|
||||||
|
### Structure du Code
|
||||||
|
|
||||||
|
Voici à quoi cela ressemble :
|
||||||
|
```plaintext
|
||||||
|
terraform
|
||||||
|
|-- modules
|
||||||
|
| `-- pve_vm
|
||||||
|
| |-- main.tf
|
||||||
|
| |-- provider.tf
|
||||||
|
| `-- variables.tf
|
||||||
|
`-- projects
|
||||||
|
`-- simple-vm-with-module
|
||||||
|
|-- credentials.auto.tfvars
|
||||||
|
|-- main.tf
|
||||||
|
|-- provider.tf
|
||||||
|
`-- variables.tf
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code du projet
|
||||||
|
|
||||||
|
Dans cet exemple, je fournis manuellement les valeurs lors de l’appel du module. Le provider est configuré au niveau du projet.
|
||||||
|
|
||||||
|
#### `main.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
node_name = "zenith"
|
||||||
|
vm_name = "zenith-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
|
||||||
|
output "vm_ip" {
|
||||||
|
value = module.pve_vm.vm_ip
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `provider.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
proxmox = {
|
||||||
|
source = "bpg/proxmox"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
provider "proxmox" {
|
||||||
|
endpoint = var.proxmox_endpoint
|
||||||
|
api_token = var.proxmox_api_token
|
||||||
|
insecure = false
|
||||||
|
ssh {
|
||||||
|
agent = false
|
||||||
|
private_key = file("~/.ssh/id_ed25519")
|
||||||
|
username = "root"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `variables.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
variable "proxmox_endpoint" {
|
||||||
|
description = "Proxmox URL endpoint"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_api_token" {
|
||||||
|
description = "Proxmox API token"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
#### `credentials.auto.tfvars`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
proxmox_endpoint = <your Proxox endpoint>
|
||||||
|
proxmox_api_token = <your Proxmox API token for the user terraformer>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Initialiser le Workspace Terraform
|
||||||
|
|
||||||
|
Dans notre nouveau projet, il faut d’abord initialiser l’environnement Terraform avec `terraform init` :
|
||||||
|
```bash
|
||||||
|
$ terraform init
|
||||||
|
Initializing the backend...
|
||||||
|
Initializing modules...
|
||||||
|
- pve_vm in ../../modules/pve_vm
|
||||||
|
Initializing provider plugins...
|
||||||
|
- Finding latest version of bpg/proxmox...
|
||||||
|
- Installing bpg/proxmox v0.78.2...
|
||||||
|
- Installed bpg/proxmox v0.78.2 (self-signed, key ID F0582AD6AE97C188)
|
||||||
|
Partner and community providers are signed by their developers.
|
||||||
|
If you'd like to know more about provider signing, you can read about it here:
|
||||||
|
https://www.terraform.io/docs/cli/plugins/signing.html
|
||||||
|
Terraform has created a lock file .terraform.lock.hcl to record the provider
|
||||||
|
selections it made above. Include this file in your version control repository
|
||||||
|
so that Terraform can guarantee to make the same selections by default when
|
||||||
|
you run "terraform init" in the future.
|
||||||
|
|
||||||
|
Terraform has been successfully initialized!
|
||||||
|
|
||||||
|
You may now begin working with Terraform. Try running "terraform plan" to see
|
||||||
|
any changes that are required for your infrastructure. All Terraform commands
|
||||||
|
should now work.
|
||||||
|
|
||||||
|
If you ever set or change modules or backend configuration for Terraform,
|
||||||
|
rerun this command to reinitialize your working directory. If you forget, other
|
||||||
|
commands will detect it and remind you to do so if necessary.
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Déployer la VM
|
||||||
|
|
||||||
|
Avant le déploiement, vérifiez que tout est correct avec `terraform plan`.
|
||||||
|
|
||||||
|
Une fois prêt, lancez le déploiement avec `terraform apply` :
|
||||||
|
```bash
|
||||||
|
$ terraform apply
|
||||||
|
module.pve_vm.data.proxmox_virtual_environment_vms.template: Reading...
|
||||||
|
module.pve_vm.data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=89b444be-7501-4538-9436-08609b380d39]
|
||||||
|
|
||||||
|
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
|
||||||
|
+ create
|
||||||
|
|
||||||
|
Terraform will perform the following actions:
|
||||||
|
|
||||||
|
# module.pve_vm.proxmox_virtual_environment_file.cloud_config will be created
|
||||||
|
+ resource "proxmox_virtual_environment_file" "cloud_config" {
|
||||||
|
+ content_type = "snippets"
|
||||||
|
+ datastore_id = "local"
|
||||||
|
+ file_modification_date = (known after apply)
|
||||||
|
+ file_name = (known after apply)
|
||||||
|
+ file_size = (known after apply)
|
||||||
|
+ file_tag = (known after apply)
|
||||||
|
+ id = (known after apply)
|
||||||
|
+ node_name = "zenith"
|
||||||
|
+ overwrite = true
|
||||||
|
+ timeout_upload = 1800
|
||||||
|
|
||||||
|
+ source_raw {
|
||||||
|
+ data = <<-EOT
|
||||||
|
#cloud-config
|
||||||
|
hostname: zenith-vm
|
||||||
|
package_update: true
|
||||||
|
package_upgrade: true
|
||||||
|
packages:
|
||||||
|
- qemu-guest-agent
|
||||||
|
users:
|
||||||
|
- default
|
||||||
|
- name: vez
|
||||||
|
groups: sudo
|
||||||
|
shell: /bin/bash
|
||||||
|
ssh-authorized-keys:
|
||||||
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
|
||||||
|
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||||
|
runcmd:
|
||||||
|
- systemctl enable qemu-guest-agent
|
||||||
|
- reboot
|
||||||
|
EOT
|
||||||
|
+ file_name = "zenith-vm.cloud-config.yaml"
|
||||||
|
+ resize = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# module.pve_vm.proxmox_virtual_environment_vm.vm will be created
|
||||||
|
+ resource "proxmox_virtual_environment_vm" "vm" {
|
||||||
|
+ acpi = true
|
||||||
|
+ bios = "ovmf"
|
||||||
|
+ id = (known after apply)
|
||||||
|
+ ipv4_addresses = (known after apply)
|
||||||
|
+ ipv6_addresses = (known after apply)
|
||||||
|
+ keyboard_layout = "en-us"
|
||||||
|
+ mac_addresses = (known after apply)
|
||||||
|
+ machine = "q35"
|
||||||
|
+ migrate = false
|
||||||
|
+ name = "zenith-vm"
|
||||||
|
+ network_interface_names = (known after apply)
|
||||||
|
+ node_name = "zenith"
|
||||||
|
+ on_boot = true
|
||||||
|
+ protection = false
|
||||||
|
+ reboot = false
|
||||||
|
+ reboot_after_update = true
|
||||||
|
+ scsi_hardware = "virtio-scsi-pci"
|
||||||
|
+ started = true
|
||||||
|
+ stop_on_destroy = true
|
||||||
|
+ tablet_device = true
|
||||||
|
+ tags = [
|
||||||
|
+ "test",
|
||||||
|
]
|
||||||
|
+ template = false
|
||||||
|
+ timeout_clone = 1800
|
||||||
|
+ timeout_create = 1800
|
||||||
|
+ timeout_migrate = 1800
|
||||||
|
+ timeout_move_disk = 1800
|
||||||
|
+ timeout_reboot = 1800
|
||||||
|
+ timeout_shutdown_vm = 1800
|
||||||
|
+ timeout_start_vm = 1800
|
||||||
|
+ timeout_stop_vm = 300
|
||||||
|
+ vm_id = (known after apply)
|
||||||
|
|
||||||
|
+ agent {
|
||||||
|
+ enabled = true
|
||||||
|
+ timeout = "15m"
|
||||||
|
+ trim = false
|
||||||
|
+ type = "virtio"
|
||||||
|
}
|
||||||
|
|
||||||
|
+ clone {
|
||||||
|
+ full = true
|
||||||
|
+ node_name = "apex"
|
||||||
|
+ retries = 1
|
||||||
|
+ vm_id = 900
|
||||||
|
}
|
||||||
|
|
||||||
|
+ cpu {
|
||||||
|
+ cores = 2
|
||||||
|
+ hotplugged = 0
|
||||||
|
+ limit = 0
|
||||||
|
+ numa = false
|
||||||
|
+ sockets = 1
|
||||||
|
+ type = "host"
|
||||||
|
+ units = 1024
|
||||||
|
}
|
||||||
|
|
||||||
|
+ disk {
|
||||||
|
+ aio = "io_uring"
|
||||||
|
+ backup = true
|
||||||
|
+ cache = "none"
|
||||||
|
+ datastore_id = "ceph-workload"
|
||||||
|
+ discard = "ignore"
|
||||||
|
+ file_format = (known after apply)
|
||||||
|
+ interface = "scsi0"
|
||||||
|
+ iothread = false
|
||||||
|
+ path_in_datastore = (known after apply)
|
||||||
|
+ replicate = true
|
||||||
|
+ size = 4
|
||||||
|
+ ssd = false
|
||||||
|
}
|
||||||
|
|
||||||
|
+ initialization {
|
||||||
|
+ datastore_id = "ceph-workload"
|
||||||
|
+ interface = "scsi1"
|
||||||
|
+ meta_data_file_id = (known after apply)
|
||||||
|
+ network_data_file_id = (known after apply)
|
||||||
|
+ type = (known after apply)
|
||||||
|
+ user_data_file_id = (known after apply)
|
||||||
|
+ vendor_data_file_id = (known after apply)
|
||||||
|
|
||||||
|
+ ip_config {
|
||||||
|
+ ipv4 {
|
||||||
|
+ address = "dhcp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ memory {
|
||||||
|
+ dedicated = 2048
|
||||||
|
+ floating = 0
|
||||||
|
+ keep_hugepages = false
|
||||||
|
+ shared = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
+ network_device {
|
||||||
|
+ bridge = "vmbr0"
|
||||||
|
+ enabled = true
|
||||||
|
+ firewall = false
|
||||||
|
+ mac_address = (known after apply)
|
||||||
|
+ model = "virtio"
|
||||||
|
+ mtu = 0
|
||||||
|
+ queues = 0
|
||||||
|
+ rate_limit = 0
|
||||||
|
+ vlan_id = 66
|
||||||
|
}
|
||||||
|
|
||||||
|
+ operating_system {
|
||||||
|
+ type = "l26"
|
||||||
|
}
|
||||||
|
|
||||||
|
+ vga {
|
||||||
|
+ memory = 16
|
||||||
|
+ type = "std"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Plan: 2 to add, 0 to change, 0 to destroy.
|
||||||
|
|
||||||
|
Changes to Outputs:
|
||||||
|
+ vm_ip = (known after apply)
|
||||||
|
|
||||||
|
Do you want to perform these actions?
|
||||||
|
Terraform will perform the actions described above.
|
||||||
|
Only 'yes' will be accepted to approve.
|
||||||
|
|
||||||
|
Enter a value: yes
|
||||||
|
|
||||||
|
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creating...
|
||||||
|
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/zenith-vm.cloud-config.yaml]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Creating...
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Creation complete after 3m13s [id=103]
|
||||||
|
|
||||||
|
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
|
||||||
|
vm_ip = "192.168.66.159"
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ La VM est maintenant prête !
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
🕗 _Ne faites pas attention à l’uptime, j’ai pris la capture d’écran le lendemain._
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Déployer Plusieurs VMs à la fois
|
||||||
|
|
||||||
|
Très bien, on a déployé une seule VM. Mais maintenant, comment passer à l’échelle ? Comment déployer plusieurs instances de ce template, avec des noms différents, sur des nœuds différents, et avec des tailles différentes ? C’est ce que je vais vous montrer.
|
||||||
|
|
||||||
|
### Une VM par Nœud
|
||||||
|
|
||||||
|
Dans l’exemple précédent, nous avons passé des valeurs fixes au module. À la place, nous pouvons définir un objet local contenant les caractéristiques de la VM, puis s’en servir lors de l’appel au module. Cela facilite l’évolution du code de déploiement :
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
node_name = local.vm.node_name
|
||||||
|
vm_name = local.vm.vm_name
|
||||||
|
vm_cpu = local.vm.vm_cpu
|
||||||
|
vm_ram = local.vm.vm_ram
|
||||||
|
vm_vlan = local.vm.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm = {
|
||||||
|
node_name = "zenith"
|
||||||
|
vm_name = "zenith-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Nous pouvons également appeler le module en itérant sur une liste d’objets définissant les VMs à déployer :
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
for_each = local.vm_list
|
||||||
|
node_name = each.value.node_name
|
||||||
|
vm_name = each.value.vm_name
|
||||||
|
vm_cpu = each.value.vm_cpu
|
||||||
|
vm_ram = each.value.vm_ram
|
||||||
|
vm_vlan = each.value.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm_list = {
|
||||||
|
zenith = {
|
||||||
|
node_name = "zenith"
|
||||||
|
vm_name = "zenith-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Bien que cela n'ait pas de sens avec une seule VM, je pourrais utiliser cette syntaxe de module, par exemple, pour déployer une machine virtuelle par nœud :
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
for_each = local.vm_list
|
||||||
|
node_name = each.value.node_name
|
||||||
|
vm_name = each.value.vm_name
|
||||||
|
vm_cpu = each.value.vm_cpu
|
||||||
|
vm_ram = each.value.vm_ram
|
||||||
|
vm_vlan = each.value.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm_list = {
|
||||||
|
for vm in flatten([
|
||||||
|
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : {
|
||||||
|
node_name = node
|
||||||
|
vm_name = "${node}-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
]) : vm.vm_name => vm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
|
||||||
|
|
||||||
|
output "vm_ip" {
|
||||||
|
value = { for k, v in module.pve_vm : k => v.vm_ip }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ Cela permet de déployer automatiquement 3 VM dans mon cluster, une par nœud.
|
||||||
|
|
||||||
|
### Plusieurs VMs par Nœud
|
||||||
|
|
||||||
|
Enfin, poussons l’idée plus loin : déployons plusieurs VMs avec des configurations différentes par nœud. Pour cela, on définit un ensemble de rôles et on utilise une boucle imbriquée pour générer toutes les combinaisons possibles pour chaque nœud Proxmox.
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
for_each = local.vm_list
|
||||||
|
node_name = each.value.node_name
|
||||||
|
vm_name = each.value.vm_name
|
||||||
|
vm_cpu = each.value.vm_cpu
|
||||||
|
vm_ram = each.value.vm_ram
|
||||||
|
vm_vlan = each.value.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm_attr = {
|
||||||
|
"master" = { ram = 2048, cpu = 2, vlan = 66 }
|
||||||
|
"worker" = { ram = 1024, cpu = 1, vlan = 66 }
|
||||||
|
}
|
||||||
|
|
||||||
|
vm_list = {
|
||||||
|
for vm in flatten([
|
||||||
|
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : [
|
||||||
|
for role, config in local.vm_attr : {
|
||||||
|
node_name = node
|
||||||
|
vm_name = "${node}-${role}"
|
||||||
|
vm_cpu = config.cpu
|
||||||
|
vm_ram = config.ram
|
||||||
|
vm_vlan = config.vlan
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]) : vm.vm_name => vm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
|
||||||
|
|
||||||
|
output "vm_ip" {
|
||||||
|
value = { for k, v in module.pve_vm : k => v.vm_ip }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
🚀 Une fois le `terraform apply` lancé, j'obtiens ça :
|
||||||
|
```bash
|
||||||
|
Apply complete! Resources: 6 added, 0 changed, 0 destroyed.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
|
||||||
|
vm_ip = {
|
||||||
|
"apex-master" = "192.168.66.167"
|
||||||
|
"apex-worker" = "192.168.66.168"
|
||||||
|
"vertex-master" = "192.168.66.169"
|
||||||
|
"vertex-worker" = "192.168.66.170"
|
||||||
|
"zenith-master" = "192.168.66.166"
|
||||||
|
"zenith-worker" = "192.168.66.172"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Nous avons transformé notre déploiement de VM Proxmox en un module Terraform réutilisable, et nous l’avons utilisé pour faire évoluer facilement notre infrastructure sur plusieurs nœuds.
|
||||||
|
|
||||||
|
Dans un prochain article, j’aimerais combiner Terraform avec Ansible afin de gérer le déploiement des VMs, et même explorer l’utilisation de différents workspaces Terraform pour gérer plusieurs environnements.
|
||||||
|
|
||||||
|
A la prochaine !
|
750
content/post/7-terraform-create-proxmox-module.md
Normal file
@@ -0,0 +1,750 @@
|
|||||||
|
---
|
||||||
|
slug: terraform-create-proxmox-module
|
||||||
|
title: Create a Terraform module for Proxmox
|
||||||
|
description: Turn your Proxmox VM code into a reusable Terraform module and learn how to scale deployments across multiple nodes.
|
||||||
|
date: 2025-07-04
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- terraform
|
||||||
|
- proxmox
|
||||||
|
- cloud-init
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
In a [previous post]({{< ref "post/3-terraform-create-vm-proxmox" >}}), I explained how to deploy **Virtual Machines** on **Proxmox** using **Terraform**, building from a [cloud-init template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
|
||||||
|
|
||||||
|
In this post, we’ll take that code and turn it into a reusable **Terraform module**. Then, I’ll show how to use that module in other projects to simplify and scale your infrastructure deployments.
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
## What is a Terraform Module?
|
||||||
|
|
||||||
|
Terraform modules are reusable components that let you organize and simplify your infrastructure code by grouping related resources into a single unit. Instead of repeating the same configuration across multiple places, you can define it once in a module and use it wherever needed, just like calling a function in programming.
|
||||||
|
|
||||||
|
Modules can be local (within your project) or remote (from the Terraform Registry or a Git repository), making it easy to share and standardize infrastructure patterns across teams or projects. By using modules, you make your code more readable, maintainable, and scalable.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Transform Project into Module
|
||||||
|
|
||||||
|
We're now going to extract the Terraform code from the [previous project]({{< ref "post/3-terraform-create-vm-proxmox" >}}) into a reusable module named `pve_vm`.
|
||||||
|
|
||||||
|
> 📌 You can find the full source code in my [Homelab repo](https://github.com/Vezpi/Homelab/). The specific code for this post lives [here](https://github.com/Vezpi/Homelab/tree/3a991010d5e9de30e12cbf365d1a1ca1ff1f6436/terraform). Make sure to adjust the variables to match your environment.
|
||||||
|
|
||||||
|
### Code Structure
|
||||||
|
|
||||||
|
Our module will live next to our projects, in another folder:
|
||||||
|
```plaintext
|
||||||
|
terraform
|
||||||
|
`-- modules
|
||||||
|
`-- pve_vm
|
||||||
|
|-- main.tf
|
||||||
|
|-- provider.tf
|
||||||
|
`-- variables.tf
|
||||||
|
```
|
||||||
|
|
||||||
|
### Module's Code
|
||||||
|
|
||||||
|
📝 Basically, the module files are the same as the project files we are transforming. Providers are declared, but not configured, inside the module.
|
||||||
|
|
||||||
|
The module `pve_vm` will be composed of 3 files:
|
||||||
|
- **main**: The core logic, same code as before.
|
||||||
|
- **provider**: Declares required providers without configuration.
|
||||||
|
- **variables**: Declares module variables, excluding provider-specific ones.
|
||||||
|
|
||||||
|
#### `main.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
# Retrieve VM templates available in Proxmox that match the specified name
|
||||||
|
data "proxmox_virtual_environment_vms" "template" {
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["${var.vm_template}"] # The name of the template to clone from
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a cloud-init configuration file as a Proxmox snippet
|
||||||
|
resource "proxmox_virtual_environment_file" "cloud_config" {
|
||||||
|
content_type = "snippets" # Cloud-init files are stored as snippets in Proxmox
|
||||||
|
datastore_id = "local" # Local datastore used to store the snippet
|
||||||
|
node_name = var.node_name # The Proxmox node where the file will be uploaded
|
||||||
|
|
||||||
|
source_raw {
|
||||||
|
file_name = "${var.vm_name}.cloud-config.yaml" # The name of the snippet file
|
||||||
|
data = <<-EOF
|
||||||
|
#cloud-config
|
||||||
|
hostname: ${var.vm_name}
|
||||||
|
package_update: true
|
||||||
|
package_upgrade: true
|
||||||
|
packages:
|
||||||
|
- qemu-guest-agent # Ensures the guest agent is installed
|
||||||
|
users:
|
||||||
|
- default
|
||||||
|
- name: ${var.vm_user}
|
||||||
|
groups: sudo
|
||||||
|
shell: /bin/bash
|
||||||
|
ssh-authorized-keys:
|
||||||
|
- "${var.vm_user_sshkey}" # Inject user's SSH key
|
||||||
|
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||||
|
runcmd:
|
||||||
|
- systemctl enable qemu-guest-agent
|
||||||
|
- reboot # Reboot the VM after provisioning
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define and provision a new VM by cloning the template and applying initialization
|
||||||
|
resource "proxmox_virtual_environment_vm" "vm" {
|
||||||
|
name = var.vm_name # VM name
|
||||||
|
node_name = var.node_name # Proxmox node to deploy the VM
|
||||||
|
tags = var.vm_tags # Optional VM tags for categorization
|
||||||
|
|
||||||
|
agent {
|
||||||
|
enabled = true # Enable the QEMU guest agent
|
||||||
|
}
|
||||||
|
|
||||||
|
stop_on_destroy = true # Ensure VM is stopped gracefully when destroyed
|
||||||
|
|
||||||
|
clone {
|
||||||
|
vm_id = data.proxmox_virtual_environment_vms.template.vms[0].vm_id # ID of the source template
|
||||||
|
node_name = data.proxmox_virtual_environment_vms.template.vms[0].node_name # Node of the source template
|
||||||
|
}
|
||||||
|
|
||||||
|
bios = var.vm_bios # BIOS type (e.g., seabios or ovmf)
|
||||||
|
machine = var.vm_machine # Machine type (e.g., q35)
|
||||||
|
|
||||||
|
cpu {
|
||||||
|
cores = var.vm_cpu # Number of CPU cores
|
||||||
|
type = "host" # Use host CPU type for best compatibility/performance
|
||||||
|
}
|
||||||
|
|
||||||
|
memory {
|
||||||
|
dedicated = var.vm_ram # RAM in MB
|
||||||
|
}
|
||||||
|
|
||||||
|
disk {
|
||||||
|
datastore_id = var.node_datastore # Datastore to hold the disk
|
||||||
|
interface = "scsi0" # Primary disk interface
|
||||||
|
size = 4 # Disk size in GB
|
||||||
|
}
|
||||||
|
|
||||||
|
initialization {
|
||||||
|
user_data_file_id = proxmox_virtual_environment_file.cloud_config.id # Link the cloud-init file
|
||||||
|
datastore_id = var.node_datastore
|
||||||
|
interface = "scsi1" # Separate interface for cloud-init
|
||||||
|
ip_config {
|
||||||
|
ipv4 {
|
||||||
|
address = "dhcp" # Get IP via DHCP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
network_device {
|
||||||
|
bridge = "vmbr0" # Use the default bridge
|
||||||
|
vlan_id = var.vm_vlan # VLAN tagging if used
|
||||||
|
}
|
||||||
|
|
||||||
|
operating_system {
|
||||||
|
type = "l26" # Linux 2.6+ kernel
|
||||||
|
}
|
||||||
|
|
||||||
|
vga {
|
||||||
|
type = "std" # Standard VGA type
|
||||||
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [ # Ignore initialization section after first depoloyment for idempotency
|
||||||
|
initialization
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Output the assigned IP address of the VM after provisioning
|
||||||
|
output "vm_ip" {
|
||||||
|
value = proxmox_virtual_environment_vm.vm.ipv4_addresses[1][0] # Second network interface's first IP
|
||||||
|
description = "VM IP"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `provider.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
proxmox = {
|
||||||
|
source = "bpg/proxmox"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `variables.tf`
|
||||||
|
|
||||||
|
> ⚠️ The defaults are based on my environment, adapt them to yours.
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
variable "node_name" {
|
||||||
|
description = "Proxmox host for the VM"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "node_datastore" {
|
||||||
|
description = "Datastore used for VM storage"
|
||||||
|
type = string
|
||||||
|
default = "ceph-workload"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_template" {
|
||||||
|
description = "Template of the VM"
|
||||||
|
type = string
|
||||||
|
default = "ubuntu-cloud"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_name" {
|
||||||
|
description = "Hostname of the VM"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_user" {
|
||||||
|
description = "Admin user of the VM"
|
||||||
|
type = string
|
||||||
|
default = "vez"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_user_sshkey" {
|
||||||
|
description = "Admin user SSH key of the VM"
|
||||||
|
type = string
|
||||||
|
default = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_cpu" {
|
||||||
|
description = "Number of CPU cores of the VM"
|
||||||
|
type = number
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_ram" {
|
||||||
|
description = "Number of RAM (MB) of the VM"
|
||||||
|
type = number
|
||||||
|
default = 2048
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_bios" {
|
||||||
|
description = "Type of BIOS used for the VM"
|
||||||
|
type = string
|
||||||
|
default = "ovmf"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_machine" {
|
||||||
|
description = "Type of machine used for the VM"
|
||||||
|
type = string
|
||||||
|
default = "q35"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_vlan" {
|
||||||
|
description = "VLAN of the VM"
|
||||||
|
type = number
|
||||||
|
default = 66
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "vm_tags" {
|
||||||
|
description = "Tags for the VM"
|
||||||
|
type = list(any)
|
||||||
|
default = ["test"]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Deploy a VM Using our Module
|
||||||
|
|
||||||
|
Now that we’ve extracted all the logic into the `pve_vm` module, our project code only needs to reference that module and pass the required variables. This makes our setup much cleaner and easier to maintain.
|
||||||
|
### Code Structure
|
||||||
|
|
||||||
|
Here what it looks like:
|
||||||
|
```plaintext
|
||||||
|
terraform
|
||||||
|
|-- modules
|
||||||
|
| `-- pve_vm
|
||||||
|
| |-- main.tf
|
||||||
|
| |-- provider.tf
|
||||||
|
| `-- variables.tf
|
||||||
|
`-- projects
|
||||||
|
`-- simple-vm-with-module
|
||||||
|
|-- credentials.auto.tfvars
|
||||||
|
|-- main.tf
|
||||||
|
|-- provider.tf
|
||||||
|
`-- variables.tf
|
||||||
|
```
|
||||||
|
|
||||||
|
### Project's Code
|
||||||
|
|
||||||
|
In this example, I manually provide the values when calling my module. The provider is configured at project level.
|
||||||
|
#### `main.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
node_name = "zenith"
|
||||||
|
vm_name = "zenith-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
|
||||||
|
output "vm_ip" {
|
||||||
|
value = module.pve_vm.vm_ip
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `provider.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
proxmox = {
|
||||||
|
source = "bpg/proxmox"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
provider "proxmox" {
|
||||||
|
endpoint = var.proxmox_endpoint
|
||||||
|
api_token = var.proxmox_api_token
|
||||||
|
insecure = false
|
||||||
|
ssh {
|
||||||
|
agent = false
|
||||||
|
private_key = file("~/.ssh/id_ed25519")
|
||||||
|
username = "root"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### `variables.tf`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
variable "proxmox_endpoint" {
|
||||||
|
description = "Proxmox URL endpoint"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "proxmox_api_token" {
|
||||||
|
description = "Proxmox API token"
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
#### `credentials.auto.tfvars`
|
||||||
|
|
||||||
|
```hcl
|
||||||
|
proxmox_endpoint = <your Proxox endpoint>
|
||||||
|
proxmox_api_token = <your Proxmox API token for the user terraformer>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Initialize the Terraform Workspace
|
||||||
|
|
||||||
|
In our new project, we first need to initialize the Terraform workspace with `terraform init`:
|
||||||
|
```bash
|
||||||
|
$ terraform init
|
||||||
|
Initializing the backend...
|
||||||
|
Initializing modules...
|
||||||
|
- pve_vm in ../../modules/pve_vm
|
||||||
|
Initializing provider plugins...
|
||||||
|
- Finding latest version of bpg/proxmox...
|
||||||
|
- Installing bpg/proxmox v0.78.2...
|
||||||
|
- Installed bpg/proxmox v0.78.2 (self-signed, key ID F0582AD6AE97C188)
|
||||||
|
Partner and community providers are signed by their developers.
|
||||||
|
If you'd like to know more about provider signing, you can read about it here:
|
||||||
|
https://www.terraform.io/docs/cli/plugins/signing.html
|
||||||
|
Terraform has created a lock file .terraform.lock.hcl to record the provider
|
||||||
|
selections it made above. Include this file in your version control repository
|
||||||
|
so that Terraform can guarantee to make the same selections by default when
|
||||||
|
you run "terraform init" in the future.
|
||||||
|
|
||||||
|
Terraform has been successfully initialized!
|
||||||
|
|
||||||
|
You may now begin working with Terraform. Try running "terraform plan" to see
|
||||||
|
any changes that are required for your infrastructure. All Terraform commands
|
||||||
|
should now work.
|
||||||
|
|
||||||
|
If you ever set or change modules or backend configuration for Terraform,
|
||||||
|
rerun this command to reinitialize your working directory. If you forget, other
|
||||||
|
commands will detect it and remind you to do so if necessary.
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deploy the VM
|
||||||
|
|
||||||
|
Before deploying it, make sure that everything is ok with a `terraform plan`.
|
||||||
|
|
||||||
|
Once ready, you can deploy it with `terraform apply`:
|
||||||
|
```bash
|
||||||
|
$ terraform apply
|
||||||
|
module.pve_vm.data.proxmox_virtual_environment_vms.template: Reading...
|
||||||
|
module.pve_vm.data.proxmox_virtual_environment_vms.template: Read complete after 0s [id=89b444be-7501-4538-9436-08609b380d39]
|
||||||
|
|
||||||
|
Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols:
|
||||||
|
+ create
|
||||||
|
|
||||||
|
Terraform will perform the following actions:
|
||||||
|
|
||||||
|
# module.pve_vm.proxmox_virtual_environment_file.cloud_config will be created
|
||||||
|
+ resource "proxmox_virtual_environment_file" "cloud_config" {
|
||||||
|
+ content_type = "snippets"
|
||||||
|
+ datastore_id = "local"
|
||||||
|
+ file_modification_date = (known after apply)
|
||||||
|
+ file_name = (known after apply)
|
||||||
|
+ file_size = (known after apply)
|
||||||
|
+ file_tag = (known after apply)
|
||||||
|
+ id = (known after apply)
|
||||||
|
+ node_name = "zenith"
|
||||||
|
+ overwrite = true
|
||||||
|
+ timeout_upload = 1800
|
||||||
|
|
||||||
|
+ source_raw {
|
||||||
|
+ data = <<-EOT
|
||||||
|
#cloud-config
|
||||||
|
hostname: zenith-vm
|
||||||
|
package_update: true
|
||||||
|
package_upgrade: true
|
||||||
|
packages:
|
||||||
|
- qemu-guest-agent
|
||||||
|
users:
|
||||||
|
- default
|
||||||
|
- name: vez
|
||||||
|
groups: sudo
|
||||||
|
shell: /bin/bash
|
||||||
|
ssh-authorized-keys:
|
||||||
|
- "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAID62LmYRu1rDUha3timAIcA39LtcIOny1iAgFLnxoBxm vez@bastion"
|
||||||
|
sudo: ALL=(ALL) NOPASSWD:ALL
|
||||||
|
runcmd:
|
||||||
|
- systemctl enable qemu-guest-agent
|
||||||
|
- reboot
|
||||||
|
EOT
|
||||||
|
+ file_name = "zenith-vm.cloud-config.yaml"
|
||||||
|
+ resize = 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# module.pve_vm.proxmox_virtual_environment_vm.vm will be created
|
||||||
|
+ resource "proxmox_virtual_environment_vm" "vm" {
|
||||||
|
+ acpi = true
|
||||||
|
+ bios = "ovmf"
|
||||||
|
+ id = (known after apply)
|
||||||
|
+ ipv4_addresses = (known after apply)
|
||||||
|
+ ipv6_addresses = (known after apply)
|
||||||
|
+ keyboard_layout = "en-us"
|
||||||
|
+ mac_addresses = (known after apply)
|
||||||
|
+ machine = "q35"
|
||||||
|
+ migrate = false
|
||||||
|
+ name = "zenith-vm"
|
||||||
|
+ network_interface_names = (known after apply)
|
||||||
|
+ node_name = "zenith"
|
||||||
|
+ on_boot = true
|
||||||
|
+ protection = false
|
||||||
|
+ reboot = false
|
||||||
|
+ reboot_after_update = true
|
||||||
|
+ scsi_hardware = "virtio-scsi-pci"
|
||||||
|
+ started = true
|
||||||
|
+ stop_on_destroy = true
|
||||||
|
+ tablet_device = true
|
||||||
|
+ tags = [
|
||||||
|
+ "test",
|
||||||
|
]
|
||||||
|
+ template = false
|
||||||
|
+ timeout_clone = 1800
|
||||||
|
+ timeout_create = 1800
|
||||||
|
+ timeout_migrate = 1800
|
||||||
|
+ timeout_move_disk = 1800
|
||||||
|
+ timeout_reboot = 1800
|
||||||
|
+ timeout_shutdown_vm = 1800
|
||||||
|
+ timeout_start_vm = 1800
|
||||||
|
+ timeout_stop_vm = 300
|
||||||
|
+ vm_id = (known after apply)
|
||||||
|
|
||||||
|
+ agent {
|
||||||
|
+ enabled = true
|
||||||
|
+ timeout = "15m"
|
||||||
|
+ trim = false
|
||||||
|
+ type = "virtio"
|
||||||
|
}
|
||||||
|
|
||||||
|
+ clone {
|
||||||
|
+ full = true
|
||||||
|
+ node_name = "apex"
|
||||||
|
+ retries = 1
|
||||||
|
+ vm_id = 900
|
||||||
|
}
|
||||||
|
|
||||||
|
+ cpu {
|
||||||
|
+ cores = 2
|
||||||
|
+ hotplugged = 0
|
||||||
|
+ limit = 0
|
||||||
|
+ numa = false
|
||||||
|
+ sockets = 1
|
||||||
|
+ type = "host"
|
||||||
|
+ units = 1024
|
||||||
|
}
|
||||||
|
|
||||||
|
+ disk {
|
||||||
|
+ aio = "io_uring"
|
||||||
|
+ backup = true
|
||||||
|
+ cache = "none"
|
||||||
|
+ datastore_id = "ceph-workload"
|
||||||
|
+ discard = "ignore"
|
||||||
|
+ file_format = (known after apply)
|
||||||
|
+ interface = "scsi0"
|
||||||
|
+ iothread = false
|
||||||
|
+ path_in_datastore = (known after apply)
|
||||||
|
+ replicate = true
|
||||||
|
+ size = 4
|
||||||
|
+ ssd = false
|
||||||
|
}
|
||||||
|
|
||||||
|
+ initialization {
|
||||||
|
+ datastore_id = "ceph-workload"
|
||||||
|
+ interface = "scsi1"
|
||||||
|
+ meta_data_file_id = (known after apply)
|
||||||
|
+ network_data_file_id = (known after apply)
|
||||||
|
+ type = (known after apply)
|
||||||
|
+ user_data_file_id = (known after apply)
|
||||||
|
+ vendor_data_file_id = (known after apply)
|
||||||
|
|
||||||
|
+ ip_config {
|
||||||
|
+ ipv4 {
|
||||||
|
+ address = "dhcp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ memory {
|
||||||
|
+ dedicated = 2048
|
||||||
|
+ floating = 0
|
||||||
|
+ keep_hugepages = false
|
||||||
|
+ shared = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
+ network_device {
|
||||||
|
+ bridge = "vmbr0"
|
||||||
|
+ enabled = true
|
||||||
|
+ firewall = false
|
||||||
|
+ mac_address = (known after apply)
|
||||||
|
+ model = "virtio"
|
||||||
|
+ mtu = 0
|
||||||
|
+ queues = 0
|
||||||
|
+ rate_limit = 0
|
||||||
|
+ vlan_id = 66
|
||||||
|
}
|
||||||
|
|
||||||
|
+ operating_system {
|
||||||
|
+ type = "l26"
|
||||||
|
}
|
||||||
|
|
||||||
|
+ vga {
|
||||||
|
+ memory = 16
|
||||||
|
+ type = "std"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Plan: 2 to add, 0 to change, 0 to destroy.
|
||||||
|
|
||||||
|
Changes to Outputs:
|
||||||
|
+ vm_ip = (known after apply)
|
||||||
|
|
||||||
|
Do you want to perform these actions?
|
||||||
|
Terraform will perform the actions described above.
|
||||||
|
Only 'yes' will be accepted to approve.
|
||||||
|
|
||||||
|
Enter a value: yes
|
||||||
|
|
||||||
|
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creating...
|
||||||
|
module.pve_vm.proxmox_virtual_environment_file.cloud_config: Creation complete after 1s [id=local:snippets/zenith-vm.cloud-config.yaml]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Creating...
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [20s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [30s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [40s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [50s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m0s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m20s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m30s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m40s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [1m50s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m0s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m20s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m30s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m40s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [2m50s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m0s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Still creating... [3m10s elapsed]
|
||||||
|
module.pve_vm.proxmox_virtual_environment_vm.vm: Creation complete after 3m13s [id=103]
|
||||||
|
|
||||||
|
Apply complete! Resources: 2 added, 0 changed, 0 destroyed.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
|
||||||
|
vm_ip = "192.168.66.159"
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ The VM is now ready!
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
🕗 *Don't pay attention to the uptime, I took the screenshot the next day*
|
||||||
|
|
||||||
|
---
|
||||||
|
## Deploy Multiple VM at Once
|
||||||
|
|
||||||
|
Ok, I've deployed a single VM, fine. But now, how to scale it? How to deploy multiple instances of that template, with different names, on different nodes, with different size? This is what I will show you now.
|
||||||
|
|
||||||
|
### One VM per Node
|
||||||
|
|
||||||
|
In the earlier example, we passed fixed values to the module. Instead, we could define a local object to store the VM specs, and reference its values when calling the module. This approach makes it easier to scale the deployment logic later:
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
node_name = local.vm.node_name
|
||||||
|
vm_name = local.vm.vm_name
|
||||||
|
vm_cpu = local.vm.vm_cpu
|
||||||
|
vm_ram = local.vm.vm_ram
|
||||||
|
vm_vlan = local.vm.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm = {
|
||||||
|
node_name = "zenith"
|
||||||
|
vm_name = "zenith-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
I could also call the module while iterating on that object:
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
for_each = local.vm_list
|
||||||
|
node_name = each.value.node_name
|
||||||
|
vm_name = each.value.vm_name
|
||||||
|
vm_cpu = each.value.vm_cpu
|
||||||
|
vm_ram = each.value.vm_ram
|
||||||
|
vm_vlan = each.value.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm_list = {
|
||||||
|
zenith = {
|
||||||
|
node_name = "zenith"
|
||||||
|
vm_name = "zenith-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
While this does not make sense with only one VM, I could use this module syntax, for example, to deploy one VM per node:
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
for_each = local.vm_list
|
||||||
|
node_name = each.value.node_name
|
||||||
|
vm_name = each.value.vm_name
|
||||||
|
vm_cpu = each.value.vm_cpu
|
||||||
|
vm_ram = each.value.vm_ram
|
||||||
|
vm_vlan = each.value.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm_list = {
|
||||||
|
for vm in flatten([
|
||||||
|
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : {
|
||||||
|
node_name = node
|
||||||
|
vm_name = "${node}-vm"
|
||||||
|
vm_cpu = 2
|
||||||
|
vm_ram = 2048
|
||||||
|
vm_vlan = 66
|
||||||
|
}
|
||||||
|
]) : vm.vm_name => vm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
|
||||||
|
|
||||||
|
output "vm_ip" {
|
||||||
|
value = { for k, v in module.pve_vm : k => v.vm_ip }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
✅ This would deploy 3 VM on my cluster, one per node:
|
||||||
|
|
||||||
|
### Multiple VM per Node
|
||||||
|
|
||||||
|
Finally, let’s scale things up by deploying multiple VMs with different configurations per node. We’ll define a set of roles and use a nested loop to generate the desired VM configurations for each Proxmox node:
|
||||||
|
```hcl
|
||||||
|
module "pve_vm" {
|
||||||
|
source = "../../modules/pve_vm"
|
||||||
|
for_each = local.vm_list
|
||||||
|
node_name = each.value.node_name
|
||||||
|
vm_name = each.value.vm_name
|
||||||
|
vm_cpu = each.value.vm_cpu
|
||||||
|
vm_ram = each.value.vm_ram
|
||||||
|
vm_vlan = each.value.vm_vlan
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
vm_attr = {
|
||||||
|
"master" = { ram = 2048, cpu = 2, vlan = 66 }
|
||||||
|
"worker" = { ram = 1024, cpu = 1, vlan = 66 }
|
||||||
|
}
|
||||||
|
|
||||||
|
vm_list = {
|
||||||
|
for vm in flatten([
|
||||||
|
for node in data.proxmox_virtual_environment_nodes.pve_nodes.names : [
|
||||||
|
for role, config in local.vm_attr : {
|
||||||
|
node_name = node
|
||||||
|
vm_name = "${node}-${role}"
|
||||||
|
vm_cpu = config.cpu
|
||||||
|
vm_ram = config.ram
|
||||||
|
vm_vlan = config.vlan
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]) : vm.vm_name => vm
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "proxmox_virtual_environment_nodes" "pve_nodes" {}
|
||||||
|
|
||||||
|
output "vm_ip" {
|
||||||
|
value = { for k, v in module.pve_vm : k => v.vm_ip }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
🚀 After deploying it with a `terraform apply`, I got this:
|
||||||
|
```bash
|
||||||
|
Apply complete! Resources: 6 added, 0 changed, 0 destroyed.
|
||||||
|
|
||||||
|
Outputs:
|
||||||
|
|
||||||
|
vm_ip = {
|
||||||
|
"apex-master" = "192.168.66.167"
|
||||||
|
"apex-worker" = "192.168.66.168"
|
||||||
|
"vertex-master" = "192.168.66.169"
|
||||||
|
"vertex-worker" = "192.168.66.170"
|
||||||
|
"zenith-master" = "192.168.66.166"
|
||||||
|
"zenith-worker" = "192.168.66.172"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
We’ve transformed our Proxmox VM deployment into a reusable Terraform module and used it to easily scale our infrastructure across multiple nodes.
|
||||||
|
|
||||||
|
In a next post, I would like to pair Terraform with Ansible to manage the VM deployment and even manage different Terraform workspaces to handle several environments.
|
||||||
|
|
||||||
|
Stay tuned!
|
636
content/post/8-create-manual-kubernetes-cluster-kubeadm.fr.md
Normal file
@@ -0,0 +1,636 @@
|
|||||||
|
---
|
||||||
|
slug: create-manual-kubernetes-cluster-kubeadm
|
||||||
|
title: Créer un Cluster Kubernetes Hautement Disponible avec kubeadm sur des VMs
|
||||||
|
description: Guide étape par étape pour créer manuellement un cluster Kubernetes hautement disponible sur des machines virtuelles avec kubeadm.
|
||||||
|
date: 2025-07-18
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- kubernetes
|
||||||
|
- kubeadm
|
||||||
|
- high-availability
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Dans cet [article précédent]({{< ref "post/7-terraform-create-proxmox-module" >}}), j'expliquais comment déployer des VMs avec un module **Terraform** sur **Proxmox** et j'avais terminé avec 6 VMs, 3 nœuds masters et 3 nœuds workers, en m'appuyant sur un [template cloud-init]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
|
||||||
|
|
||||||
|
Maintenant que l'infrastructure est prête, passons à l'étape suivante : **créer manuellement un cluster Kubernetes** dans mon homelab avec `kubeadm`, hautement disponible utilisant `etcd` empilé.
|
||||||
|
|
||||||
|
Dans cet article, je vais détailler chaque étape de l'installation d’un cluster Kubernetes. Je n'utiliserai pas d'outil d'automatisation pour configurer les nœuds pour le moment, afin de mieux comprendre les étapes impliquées dans le bootstrap d’un cluster Kubernetes. L'automatisation sera couverte dans de futurs articles.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Qu'est ce que Kubernetes
|
||||||
|
|
||||||
|
Kubernetes est une plateforme open-source qui orchestre des containers sur un ensemble de machines. Elle gère le déploiement, la montée en charge et la santé des applications conteneurisées, ce qui vous permet de vous concentrer sur vos services plutôt que sur l’infrastructure sous-jacente.
|
||||||
|
|
||||||
|
Un cluster Kubernetes est composé de deux types de nœuds : les nœuds control plane (masters) et les workers. Le control plane assure la gestion globale du cluster, il prend les décisions de planification, surveille l’état du système et réagit aux événements. Les workers, eux, exécutent réellement vos applications, dans des containers gérés par Kubernetes.
|
||||||
|
|
||||||
|
Dans cet article, nous allons mettre en place manuellement un cluster Kubernetes avec 3 nœuds control plane et 3 workers. Cette architecture reflète un environnement hautement disponible et proche de la production, même si l’objectif ici est avant tout pédagogique.
|
||||||
|
|
||||||
|
La documentation officielle se trouve [ici](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), je vais utiliser la version **v1.32**.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Préparer les Nœuds
|
||||||
|
|
||||||
|
Je vais exécuter les étapes suivantes sur les **6 VMs** (masters et workers).
|
||||||
|
|
||||||
|
### Hostname
|
||||||
|
|
||||||
|
Chaque VM possède un **nom d’hôte unique** et tous les nœuds doivent pouvoir **se résoudre entre eux**.
|
||||||
|
|
||||||
|
Le nom d’hôte est défini à la création de la VM via cloud-init. Mais pour la démonstration, je vais le définir manuellement :
|
||||||
|
```bash
|
||||||
|
sudo hostnamectl set-hostname <hostname>
|
||||||
|
```
|
||||||
|
|
||||||
|
Dans mon infrastructure, les nœuds se résolvent via mon serveur DNS sur le domaine `lab.vezpi.me`. Si vous n’avez pas de DNS, vous pouvez inscrire manuellement les IPs des nœuds dans le fichier `/etc/hosts` :
|
||||||
|
```bash
|
||||||
|
192.168.66.168 apex-worker
|
||||||
|
192.168.66.167 apex-master
|
||||||
|
192.168.66.166 zenith-master
|
||||||
|
192.168.66.170 vertex-worker
|
||||||
|
192.168.66.169 vertex-master
|
||||||
|
192.168.66.172 zenith-worker
|
||||||
|
```
|
||||||
|
|
||||||
|
### Mises à jour Système
|
||||||
|
|
||||||
|
Mes VMs tournent sous **Ubuntu 24.04.2 LTS**. Cloud-init s’occupe des mises à jour après le provisionnement, mais on s’assure quand même que tout est bien à jour et on installe les paquets nécessaires pour ajouter le dépôt Kubernetes :
|
||||||
|
```bash
|
||||||
|
sudo apt update && sudo apt upgrade -y
|
||||||
|
sudo apt install -y apt-transport-https ca-certificates curl gpg
|
||||||
|
```
|
||||||
|
|
||||||
|
### Swap
|
||||||
|
|
||||||
|
Par défaut, `kubelet` ne démarre pas si une **mémoire swap** est détectée sur un nœud. Il faut donc la désactiver ou la rendre tolérable par `kubelet`.
|
||||||
|
|
||||||
|
Mes VMs ne disposent pas de swap, mais voici comment le désactiver si besoin :
|
||||||
|
```bash
|
||||||
|
sudo swapoff -a
|
||||||
|
sudo sed -i '/ swap / s/^/#/' /etc/fstab
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pare-feu
|
||||||
|
|
||||||
|
Dans ce lab, je désactive simplement le pare-feu local (à ne pas faire en production) :
|
||||||
|
```bash
|
||||||
|
sudo systemctl disable --now ufw
|
||||||
|
```
|
||||||
|
|
||||||
|
En production, vous devez autoriser la communication entre les nœuds sur les ports suivants :
|
||||||
|
#### Control Plane
|
||||||
|
|
||||||
|
| Protocole | Direction | Ports | Usage | Utilisé par |
|
||||||
|
| --------- | --------- | --------- | ----------------------- | -------------------- |
|
||||||
|
| TCP | Entrant | 6443 | API server Kubernetes | Tous |
|
||||||
|
| TCP | Entrant | 2379-2380 | API client etcd | kube-apiserver, etcd |
|
||||||
|
| TCP | Entrant | 10250 | API Kubelet | Plan de contrôle |
|
||||||
|
| TCP | Entrant | 10259 | kube-scheduler | Lui-même |
|
||||||
|
| TCP | Entrant | 10257 | kube-controller-manager | Lui-même |
|
||||||
|
#### Worker
|
||||||
|
|
||||||
|
| Protocole | Direction | Ports | Usage | Utilisé par |
|
||||||
|
| --------- | --------- | ----------- | ----------------- | -------------- |
|
||||||
|
| TCP | Entrant | 10250 | API Kubelet | Control plane |
|
||||||
|
| TCP | Entrant | 10256 | kube-proxy | Load balancers |
|
||||||
|
| TCP | Entrant | 30000-32767 | Services NodePort | Tous |
|
||||||
|
### Modules Noyau et Paramètres sysctl
|
||||||
|
|
||||||
|
Kubernetes requiert l’activation de deux modules noyau :
|
||||||
|
- **overlay** : pour permettre l’empilement de systèmes de fichiers.
|
||||||
|
- **br_netfilter** : pour activer le filtrage des paquets sur les interfaces bridge.
|
||||||
|
|
||||||
|
Activation des modules :
|
||||||
|
```bash
|
||||||
|
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
|
||||||
|
overlay
|
||||||
|
br_netfilter
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sudo modprobe overlay
|
||||||
|
sudo modprobe br_netfilter
|
||||||
|
```
|
||||||
|
|
||||||
|
Appliquer les paramètres noyau nécessaires pour la partie réseau :
|
||||||
|
```bash
|
||||||
|
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
|
||||||
|
net.bridge.bridge-nf-call-iptables = 1
|
||||||
|
net.bridge.bridge-nf-call-ip6tables = 1
|
||||||
|
net.ipv4.ip_forward = 1
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sudo sysctl --system
|
||||||
|
```
|
||||||
|
|
||||||
|
### Runtime de Containers
|
||||||
|
|
||||||
|
Chaque nœud du cluster doit disposer d’un **runtime de containers** pour pouvoir exécuter des Pods. J’utilise ici `containerd` :
|
||||||
|
```bash
|
||||||
|
sudo apt install -y containerd
|
||||||
|
```
|
||||||
|
|
||||||
|
Créer la configuration par défaut :
|
||||||
|
```bash
|
||||||
|
sudo mkdir -p /etc/containerd
|
||||||
|
containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
|
||||||
|
```
|
||||||
|
|
||||||
|
Utiliser `systemd` comme pilote de _cgroup_ :
|
||||||
|
```bash
|
||||||
|
sudo sed -i 's/^\(\s*SystemdCgroup\s*=\s*\)false/\1true/' /etc/containerd/config.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
Redémarrer et activer le service `containerd` :
|
||||||
|
```bash
|
||||||
|
sudo systemctl restart containerd
|
||||||
|
sudo systemctl enable containerd
|
||||||
|
```
|
||||||
|
|
||||||
|
### Paquets Kubernetes
|
||||||
|
|
||||||
|
Dernière étape : installer les paquets Kubernetes. On commence par ajouter le dépôt officiel et sa clé de signature.
|
||||||
|
|
||||||
|
Ajouter la clé :
|
||||||
|
```bash
|
||||||
|
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
|
||||||
|
```
|
||||||
|
|
||||||
|
Ajouter le dépôt :
|
||||||
|
```bash
|
||||||
|
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
|
||||||
|
```
|
||||||
|
|
||||||
|
Installer ensuite les paquets nécessaires :
|
||||||
|
- `kubeadm` : l’outil pour initier un cluster Kubernetes.
|
||||||
|
- `kubelet` : l’agent qui s’exécute sur tous les nœuds et qui gère les pods/containers.
|
||||||
|
- `kubectl` : l’outil en ligne de commande pour interagir avec le cluster.
|
||||||
|
|
||||||
|
Sur les nœuds, on installe `kubelet` et `kubeadm`, puis on les fige :
|
||||||
|
```bash
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y kubelet kubeadm
|
||||||
|
sudo apt-mark hold kubelet kubeadm
|
||||||
|
```
|
||||||
|
|
||||||
|
ℹ️ Je ne gérerai pas le cluster depuis les nœuds eux-mêmes, j’installe `kubectl` sur mon contrôleur LXC à la place :
|
||||||
|
```bash
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y kubectl
|
||||||
|
sudo apt-mark hold kubectl
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Initialiser le Cluster
|
||||||
|
|
||||||
|
Une fois tous les nœuds préparés, on peut initialiser le **plan de contrôle** Kubernetes sur le **premier nœud master**.
|
||||||
|
|
||||||
|
### Amorcer le Cluster
|
||||||
|
|
||||||
|
Exécutez la commande suivante pour amorcer le cluster:
|
||||||
|
```bash
|
||||||
|
sudo kubeadm init \
|
||||||
|
--control-plane-endpoint "k8s-lab.lab.vezpi.me:6443" \
|
||||||
|
--upload-certs \
|
||||||
|
--pod-network-cidr=10.10.0.0/16
|
||||||
|
```
|
||||||
|
|
||||||
|
**Explications** :
|
||||||
|
- `--control-plane-endpoint` : Nom DNS pour votre plan de contrôle.
|
||||||
|
- `--upload-certs` : Télécharge les certificats qui doivent être partagés entre toutes les masters du cluster.
|
||||||
|
- `--pod-network-cidr` : Sous-réseau à utiliser pour le CNI.
|
||||||
|
|
||||||
|
Cette étape va :
|
||||||
|
- Initialiser la base `etcd` et les composants du plan de contrôle.
|
||||||
|
- Configurer RBAC et les tokens d’amorçage.
|
||||||
|
- Afficher deux commandes `kubeadm join` importantes : une pour les **workers**, l’autre pour les **masters supplémentaires**.
|
||||||
|
|
||||||
|
ℹ️ Le nom DNS `k8s-lab.lab.vezpi.me` est géré dans mon homelab par **Unbound DNS**, cela résout sur mon interface d'**OPNsense** où un service **HAProxy** écoute sur le port 6443 et équilibre la charge entre les 3 nœuds du plan de contrôle.
|
||||||
|
|
||||||
|
Vous verrez aussi un message indiquant comment configurer l’accès `kubectl`.
|
||||||
|
|
||||||
|
```plaintext
|
||||||
|
I0718 07:18:29.306814 14724 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
|
||||||
|
[init] Using Kubernetes version: v1.32.7
|
||||||
|
[preflight] Running pre-flight checks
|
||||||
|
[preflight] Pulling images required for setting up a Kubernetes cluster
|
||||||
|
[preflight] This might take a minute or two, depending on the speed of your internet connection
|
||||||
|
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
|
||||||
|
W0718 07:18:29.736833 14724 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
|
||||||
|
[certs] Using certificateDir folder "/etc/kubernetes/pki"
|
||||||
|
[certs] Generating "ca" certificate and key
|
||||||
|
[certs] Generating "apiserver" certificate and key
|
||||||
|
[certs] apiserver serving cert is signed for DNS names [apex-master k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.66.167]
|
||||||
|
[certs] Generating "apiserver-kubelet-client" certificate and key
|
||||||
|
[certs] Generating "front-proxy-ca" certificate and key
|
||||||
|
[certs] Generating "front-proxy-client" certificate and key
|
||||||
|
[certs] Generating "etcd/ca" certificate and key
|
||||||
|
[certs] Generating "etcd/server" certificate and key
|
||||||
|
[certs] etcd/server serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "etcd/peer" certificate and key
|
||||||
|
[certs] etcd/peer serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "etcd/healthcheck-client" certificate and key
|
||||||
|
[certs] Generating "apiserver-etcd-client" certificate and key
|
||||||
|
[certs] Generating "sa" key and public key
|
||||||
|
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
|
||||||
|
[kubeconfig] Writing "admin.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "super-admin.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "kubelet.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "scheduler.conf" kubeconfig file
|
||||||
|
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
|
||||||
|
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-apiserver"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-controller-manager"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-scheduler"
|
||||||
|
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
|
||||||
|
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
|
||||||
|
[kubelet-start] Starting the kubelet
|
||||||
|
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests"
|
||||||
|
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
|
||||||
|
[kubelet-check] The kubelet is healthy after 501.894876ms
|
||||||
|
[api-check] Waiting for a healthy API server. This can take up to 4m0s
|
||||||
|
[api-check] The API server is healthy after 9.030595455s
|
||||||
|
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
|
||||||
|
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
|
||||||
|
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
[upload-certs] Using certificate key:
|
||||||
|
70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
|
||||||
|
[mark-control-plane] Marking the node apex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
|
||||||
|
[mark-control-plane] Marking the node apex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
|
||||||
|
[bootstrap-token] Using token: 8etamd.g8whseg60kg09nu1
|
||||||
|
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
|
||||||
|
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
|
||||||
|
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
|
||||||
|
[addons] Applied essential addon: CoreDNS
|
||||||
|
[addons] Applied essential addon: kube-proxy
|
||||||
|
|
||||||
|
Your Kubernetes control-plane has initialized successfully!
|
||||||
|
|
||||||
|
To start using your cluster, you need to run the following as a regular user:
|
||||||
|
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||||
|
|
||||||
|
Alternatively, if you are the root user, you can run:
|
||||||
|
|
||||||
|
export KUBECONFIG=/etc/kubernetes/admin.conf
|
||||||
|
|
||||||
|
You should now deploy a pod network to the cluster.
|
||||||
|
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
|
||||||
|
https://kubernetes.io/docs/concepts/cluster-administration/addons/
|
||||||
|
|
||||||
|
You can now join any number of control-plane nodes running the following command on each as root:
|
||||||
|
|
||||||
|
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
|
||||||
|
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c \
|
||||||
|
--control-plane --certificate-key 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
|
||||||
|
|
||||||
|
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
|
||||||
|
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
|
||||||
|
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
|
||||||
|
|
||||||
|
Then you can join any number of worker nodes by running the following on each as root:
|
||||||
|
|
||||||
|
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
|
||||||
|
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configurer `kubectl`
|
||||||
|
|
||||||
|
Si vous préférez gérer votre cluster depuis le nœud master, vous pouvez simplement copier-coller depuis la sortie de la commande `kubeadm init` :
|
||||||
|
```bash
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||||
|
```
|
||||||
|
|
||||||
|
Si vous préférez contrôler le cluster depuis autre part, dans mon cas depuis mon bastion LXC :
|
||||||
|
```bash
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
rsync --rsync-path="sudo rsync" <master-node>:/etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
```
|
||||||
|
|
||||||
|
Vérifiez l'accès :
|
||||||
|
```bash
|
||||||
|
kubectl get nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
ℹ️ You devriez voir seulement le premier master listé (dans l'état `NotReady` jusqu'à ce que le CNI soit déployé).
|
||||||
|
|
||||||
|
### Installer le Plugin CNI Cilium
|
||||||
|
|
||||||
|
Depuis la [documentation Cilium](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/), Il y a 2 manières principales pour installer le CNI : utiliser la **CLI Cilium** ou **Helm**, pour ce lab je vais utiliser l'outil CLI.
|
||||||
|
|
||||||
|
#### Installer la CLI Cilium
|
||||||
|
|
||||||
|
La CLI Cilium peut être utilisée pour installer Cilium, inspecter l'état de l'installation Cilium et activer/désactiver diverses fonctionnalités (ex : `clustermesh`, `Hubble`) :
|
||||||
|
```bash
|
||||||
|
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
|
||||||
|
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-amd64.tar.gz{,.sha256sum}
|
||||||
|
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
|
||||||
|
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/local/bin
|
||||||
|
rm cilium-linux-amd64.tar.gz{,.sha256sum}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Installer Cilium
|
||||||
|
|
||||||
|
Installer Cilium dans le cluster Kubernetes pointé par le contexte `kubectl` :
|
||||||
|
```bash
|
||||||
|
cilium install
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
__ Using Cilium version 1.17.5
|
||||||
|
__ Auto-detected cluster name: kubernetes
|
||||||
|
__ Auto-detected kube-proxy has been installed
|
||||||
|
```
|
||||||
|
#### Valider l'Installation
|
||||||
|
|
||||||
|
Pour valider que Cilium a été installé correctement :
|
||||||
|
```bash
|
||||||
|
cilium status --wait
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
/__\
|
||||||
|
/__\__/__\ Cilium: OK
|
||||||
|
\__/__\__/ Operator: OK
|
||||||
|
/__\__/__\ Envoy DaemonSet: OK
|
||||||
|
\__/__\__/ Hubble Relay: disabled
|
||||||
|
\__/ ClusterMesh: disabled
|
||||||
|
|
||||||
|
DaemonSet cilium Desired: 1, Ready: 1/1, Available: 1/1
|
||||||
|
DaemonSet cilium-envoy Desired: 1, Ready: 1/1, Available: 1/1
|
||||||
|
Deployment cilium-operator Desired: 1, Ready: 1/1, Available: 1/1
|
||||||
|
Containers: cilium Running: 1
|
||||||
|
cilium-envoy Running: 1
|
||||||
|
cilium-operator Running: 1
|
||||||
|
clustermesh-apiserver
|
||||||
|
hubble-relay
|
||||||
|
Cluster Pods: 0/2 managed by Cilium
|
||||||
|
Helm chart version: 1.17.5
|
||||||
|
Image versions cilium quay.io/cilium/cilium:v1.17.5@sha256:baf8541723ee0b72d6c489c741c81a6fdc5228940d66cb76ef5ea2ce3c639ea6: 1
|
||||||
|
cilium-envoy quay.io/cilium/cilium-envoy:v1.32.6-1749271279-0864395884b263913eac200ee2048fd985f8e626@sha256:9f69e290a7ea3d4edf9192acd81694089af048ae0d8a67fb63bd62dc1d72203e: 1
|
||||||
|
cilium-operator quay.io/cilium/operator-generic:v1.17.5@sha256:f954c97eeb1b47ed67d08cc8fb4108fb829f869373cbb3e698a7f8ef1085b09e: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
Une fois installé, le nœud master doit passer au statut `Ready`.
|
||||||
|
```plaintext
|
||||||
|
NAME STATUS ROLES AGE VERSION
|
||||||
|
apex-master Ready control-plane 99m v1.32.7
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Ajouter les Nœuds Supplémentaires
|
||||||
|
|
||||||
|
Après avoir initialisé le premier nœud du control plane, vous pouvez maintenant **ajouter les autres nœuds** au cluster.
|
||||||
|
|
||||||
|
Il existe deux types de commandes `join` :
|
||||||
|
- Une pour rejoindre les **nœuds du control plane (masters)**
|
||||||
|
- Une pour rejoindre les **nœuds workers**
|
||||||
|
|
||||||
|
Ces commandes sont affichées à la fin de la commande `kubeadm init`. Si vous ne les avez pas copiées, il est possible de les **régénérer**.
|
||||||
|
|
||||||
|
⚠️ Les certificats et la clé de déchiffrement **expirent au bout de deux heures**.
|
||||||
|
|
||||||
|
### Ajouter des Masters
|
||||||
|
|
||||||
|
Vous pouvez maintenant ajouter d'autres nœuds du control plane en exécutant la commande fournie par `kubeadm init` :
|
||||||
|
```bash
|
||||||
|
sudo kubeadm join <control-plane-endpoint> --token <token> --discovery-token-ca-cert-hash <discovery-token-ca-cert-hash> --control-plane --certificate-key <certificate-key>
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
[preflight] Running pre-flight checks
|
||||||
|
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
|
||||||
|
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
|
||||||
|
[preflight] Running pre-flight checks before initializing the new control plane instance
|
||||||
|
[preflight] Pulling images required for setting up a Kubernetes cluster
|
||||||
|
[preflight] This might take a minute or two, depending on the speed of your internet connection
|
||||||
|
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
|
||||||
|
W0718 09:27:32.248290 12043 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
|
||||||
|
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
[download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
|
||||||
|
[certs] Using certificateDir folder "/etc/kubernetes/pki"
|
||||||
|
[certs] Generating "etcd/server" certificate and key
|
||||||
|
[certs] etcd/server serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "etcd/peer" certificate and key
|
||||||
|
[certs] etcd/peer serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "apiserver-etcd-client" certificate and key
|
||||||
|
[certs] Generating "etcd/healthcheck-client" certificate and key
|
||||||
|
[certs] Generating "apiserver" certificate and key
|
||||||
|
[certs] apiserver serving cert is signed for DNS names [k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local vertex-master] and IPs [10.96.0.1 192.168.66.169]
|
||||||
|
[certs] Generating "apiserver-kubelet-client" certificate and key
|
||||||
|
[certs] Generating "front-proxy-client" certificate and key
|
||||||
|
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
|
||||||
|
[certs] Using the existing "sa" key
|
||||||
|
[kubeconfig] Generating kubeconfig files
|
||||||
|
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
|
||||||
|
[kubeconfig] Writing "admin.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "scheduler.conf" kubeconfig file
|
||||||
|
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-apiserver"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-controller-manager"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-scheduler"
|
||||||
|
[check-etcd] Checking that the etcd cluster is healthy
|
||||||
|
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
|
||||||
|
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
|
||||||
|
[kubelet-start] Starting the kubelet
|
||||||
|
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
|
||||||
|
[kubelet-check] The kubelet is healthy after 501.761616ms
|
||||||
|
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
|
||||||
|
[etcd] Announced new etcd member joining to the existing etcd cluster
|
||||||
|
[etcd] Creating static Pod manifest for "etcd"
|
||||||
|
{"level":"warn","ts":"2025-07-18T09:27:36.040077Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
|
||||||
|
[...]
|
||||||
|
{"level":"warn","ts":"2025-07-18T09:27:44.976805Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
|
||||||
|
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
|
||||||
|
[mark-control-plane] Marking the node vertex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
|
||||||
|
[mark-control-plane] Marking the node vertex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
|
||||||
|
|
||||||
|
This node has joined the cluster and a new control plane instance was created:
|
||||||
|
|
||||||
|
* Certificate signing request was sent to apiserver and approval was received.
|
||||||
|
* The Kubelet was informed of the new secure connection details.
|
||||||
|
* Control plane label and taint were applied to the new node.
|
||||||
|
* The Kubernetes control plane instances scaled up.
|
||||||
|
* A new etcd member was added to the local/stacked etcd cluster.
|
||||||
|
|
||||||
|
To start administering your cluster from this node, you need to run the following as a regular user:
|
||||||
|
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||||
|
|
||||||
|
Run 'kubectl get nodes' to see this node join the cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Regénérer les Certificats
|
||||||
|
|
||||||
|
Si les certificats ont expiré, vous verrez un message d’erreur lors du `kubeadm join` :
|
||||||
|
```plaintext
|
||||||
|
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
|
||||||
|
```
|
||||||
|
|
||||||
|
Dans ce cas, vous pouvez **retélécharger les certificats** et générer une nouvelle clé de chiffrement à partir d’un nœud déjà membre du cluster :
|
||||||
|
```bash
|
||||||
|
sudo kubeadm init phase upload-certs --upload-certs
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
I0718 09:26:12.448472 18624 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
|
||||||
|
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
[upload-certs] Using certificate key:
|
||||||
|
7531149107ebc3caf4990f94d19824aecf39d93b84ee1b9c86aee84c04e76656
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Générer un Token
|
||||||
|
|
||||||
|
Associé au certificat, vous aurez besoin d’un **nouveau token**, cette commande affichera directement la commande complète `join` pour un master :
|
||||||
|
```bash
|
||||||
|
sudo kubeadm token create --print-join-command --certificate-key <certificate-key>
|
||||||
|
```
|
||||||
|
|
||||||
|
Utilisez cette commande sur les nœuds à ajouter au cluster Kubernetes comme master.
|
||||||
|
|
||||||
|
### Ajouter des Workers
|
||||||
|
|
||||||
|
Vous pouvez rejoindre n'importe quel nombre de nœuds workers avec la commande suivante :
|
||||||
|
```bash
|
||||||
|
sudo kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
|
||||||
|
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
[preflight] Running pre-flight checks
|
||||||
|
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
|
||||||
|
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
|
||||||
|
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
|
||||||
|
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
|
||||||
|
[kubelet-start] Starting the kubelet
|
||||||
|
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
|
||||||
|
[kubelet-check] The kubelet is healthy after 506.731798ms
|
||||||
|
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
|
||||||
|
|
||||||
|
This node has joined the cluster:
|
||||||
|
* Certificate signing request was sent to apiserver and a response was received.
|
||||||
|
* The Kubelet was informed of the new secure connection details.
|
||||||
|
|
||||||
|
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
Encore une fois, si vous avez perdu l’output initial de `kubeadm init`, vous pouvez régénérer une nouvelle commande complète :
|
||||||
|
```bash
|
||||||
|
sudo kubeadm token create --print-join-command
|
||||||
|
```
|
||||||
|
|
||||||
|
Utilisez cette commande sur les nœuds à ajouter comme workers.
|
||||||
|
|
||||||
|
### Vérifier le Cluster
|
||||||
|
|
||||||
|
Depuis votre contrôleur, vous pouvez vérifier que tous les nœuds ont bien rejoint le cluster et sont dans l’état `Ready` :
|
||||||
|
```bash
|
||||||
|
kubectl get node
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
NAME STATUS ROLES AGE VERSION
|
||||||
|
apex-master Ready control-plane 154m v1.32.7
|
||||||
|
apex-worker Ready <none> 5m14s v1.32.7
|
||||||
|
vertex-master Ready control-plane 26m v1.32.7
|
||||||
|
vertex-worker Ready <none> 3m39s v1.32.7
|
||||||
|
zenith-master Ready control-plane 23m v1.32.7
|
||||||
|
zenith-worker Ready <none> 3m26s v1.32.7
|
||||||
|
```
|
||||||
|
|
||||||
|
Pour valider que le cluster a une bonne connectivité réseau :
|
||||||
|
```bash
|
||||||
|
cilium connectivity test
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
__ Monitor aggregation detected, will skip some flow validation steps
|
||||||
|
[kubernetes] Creating namespace cilium-test-1 for connectivity check...
|
||||||
|
__ [kubernetes] Deploying echo-same-node service...
|
||||||
|
__ [kubernetes] Deploying DNS test server configmap...
|
||||||
|
__ [kubernetes] Deploying same-node deployment...
|
||||||
|
__ [kubernetes] Deploying client deployment...
|
||||||
|
__ [kubernetes] Deploying client2 deployment...
|
||||||
|
__ [kubernetes] Deploying client3 deployment...
|
||||||
|
__ [kubernetes] Deploying echo-other-node service...
|
||||||
|
__ [kubernetes] Deploying other-node deployment...
|
||||||
|
__ [host-netns] Deploying kubernetes daemonset...
|
||||||
|
__ [host-netns-non-cilium] Deploying kubernetes daemonset...
|
||||||
|
__ Skipping tests that require a node Without Cilium
|
||||||
|
[kubernetes] Waiting for deployment cilium-test-1/client to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/client2 to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/echo-same-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/client3 to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/echo-other-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach default/kubernetes service...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach default/kubernetes service...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach default/kubernetes service...
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-6824w
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-6824w
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.166:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.166:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.172:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.172:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.167:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.167:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.168:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.168:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.169:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.169:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.170:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.170:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns-non-cilium to become ready...
|
||||||
|
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns to become ready...
|
||||||
|
__ Skipping IPCache check
|
||||||
|
Enabling Hubble telescope...
|
||||||
|
__ Unable to contact Hubble Relay, disabling Hubble telescope and flow validation: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp [::1]:4245: connect: connection refused"
|
||||||
|
Expose Relay locally with:
|
||||||
|
cilium hubble enable
|
||||||
|
cilium hubble port-forward&
|
||||||
|
__ Cilium version: 1.17.5
|
||||||
|
[cilium-test-1] Running 123 tests ...
|
||||||
|
[=] [cilium-test-1] Test [no-policies] [1/123]
|
||||||
|
[...]
|
||||||
|
[=] [cilium-test-1] Test [check-log-errors] [123/123]
|
||||||
|
.................................................
|
||||||
|
__ [cilium-test-1] All 73 tests (739 actions) successful, 50 tests skipped, 1 scenarios skipped.
|
||||||
|
```
|
||||||
|
|
||||||
|
⌛ Ce test de connectivité peut prendre jusqu’à **30 minutes**.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
🚀 Notre cluster Kubernetes hautement disponible est prêt !
|
||||||
|
|
||||||
|
Dans cet article, nous avons vu comment **créer manuellement un cluster Kubernetes** dans mon homelab à l’aide de `kubeadm`, sur un ensemble de 6 machines Ubuntu (3 masters et 3 workers) préalablement déployées avec Terraform sur Proxmox.
|
||||||
|
|
||||||
|
Nous avons suivi les étapes suivantes :
|
||||||
|
- Préparation des nœuds avec les outils, modules noyau et runtime nécessaires
|
||||||
|
- Installation des paquets Kubernetes
|
||||||
|
- Initialisation du cluster depuis le premier nœud master
|
||||||
|
- Ajout des autres nœuds du plan de contrôle et les workers
|
||||||
|
- Vérification de l’état et du bon fonctionnement du cluster
|
||||||
|
|
||||||
|
Cette approche manuelle permet de mieux comprendre comment un cluster Kubernetes est construit en interne. C’est une excellente base avant de passer à l’automatisation dans les prochains articles, en utilisant des outils comme Ansible.
|
||||||
|
|
||||||
|
Restez connectés, la suite sera axée sur l’automatisation de tout ça !
|
635
content/post/8-create-manual-kubernetes-cluster-kubeadm.md
Normal file
@@ -0,0 +1,635 @@
|
|||||||
|
---
|
||||||
|
slug: create-manual-kubernetes-cluster-kubeadm
|
||||||
|
title: Create a Highly Available Kubernetes Cluster with kubeadm on VMs
|
||||||
|
description: Step-by-step guide to manually build a highly available Kubernetes cluster on virtual machines using kubeadm.
|
||||||
|
date: 2025-07-18
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- kubernetes
|
||||||
|
- kubeadm
|
||||||
|
- high-availability
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
In this [previous article]({{< ref "post/7-terraform-create-proxmox-module" >}}), I explained how to deploy VMs using a **Terraform** module with **Proxmox** and ended up with 6 VMs, 3 masters and 3 workers nodes, based on [cloud-init template]({{< ref "post/1-proxmox-cloud-init-vm-template" >}}).
|
||||||
|
|
||||||
|
Now that the infrastructure is ready, let’s move on to the next step: **manually building a Kubernetes cluster** in my homelab using `kubeadm`, highly available using stacked `etcd`.
|
||||||
|
|
||||||
|
In this post, I’ll walk through each step of the installation process of a Kubernetes cluster. I will not rely on automation tools to configure the nodes for now, to better understand what are the steps involved in a Kubernetes cluster bootstrapping. Automation will be covered in future posts.
|
||||||
|
|
||||||
|
---
|
||||||
|
## What is Kubernetes
|
||||||
|
|
||||||
|
Kubernetes is an open-source platform for orchestrating containers across a group of machines. It handles the deployment, scaling, and health of containerized applications, allowing you to focus on building your services rather than managing infrastructure details.
|
||||||
|
|
||||||
|
A Kubernetes cluster is made up of two main types of nodes: control plane (masters) nodes and worker nodes. The control plane is responsible for the overall management of the cluster, it makes decisions about scheduling, monitoring, and responding to changes in the system. The worker nodes are where your applications actually run, inside containers managed by Kubernetes.
|
||||||
|
|
||||||
|
In this post, we’ll manually set up a Kubernetes cluster with 3 control plane nodes (masters) and 3 workers. This structure reflects a highly available and production-like setup, even though the goal here is mainly to learn and understand how the components fit together.
|
||||||
|
|
||||||
|
The official documentation can be found [here](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/), I will use the version **v1.32**.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Prepare the Nodes
|
||||||
|
|
||||||
|
I will perform the following steps on all 6 VMs (masters and workers).
|
||||||
|
|
||||||
|
### Hostname
|
||||||
|
|
||||||
|
Each VM has a unique **hostname** and all nodes must **resolve** each other.
|
||||||
|
|
||||||
|
The hostname is set upon the VM creation with cloud-init. But for demonstration purpose, I'll set it manually:
|
||||||
|
```bash
|
||||||
|
sudo hostnamectl set-hostname <hostname>
|
||||||
|
```
|
||||||
|
|
||||||
|
On my infrastructure, the nodes resolve the hostnames each other using my DNS server on that domain (`lab.vezpi.me`). In case you don't have a DNS server, you can hardcode the nodes IP in each `/etc/hosts` file:
|
||||||
|
```bash
|
||||||
|
192.168.66.168 apex-worker
|
||||||
|
192.168.66.167 apex-master
|
||||||
|
192.168.66.166 zenith-master
|
||||||
|
192.168.66.170 vertex-worker
|
||||||
|
192.168.66.169 vertex-master
|
||||||
|
192.168.66.172 zenith-worker
|
||||||
|
```
|
||||||
|
|
||||||
|
### OS Updates
|
||||||
|
|
||||||
|
My VMs are running **Ubuntu 24.04.2 LTS**. Cloud-init handles the updates after the provision in that case, but let's make sure everything is up to date and install packages needed to add Kubernetes repository:
|
||||||
|
```bash
|
||||||
|
sudo apt update && sudo apt upgrade -y
|
||||||
|
sudo apt install -y apt-transport-https ca-certificates curl gpg
|
||||||
|
```
|
||||||
|
|
||||||
|
### Swap
|
||||||
|
|
||||||
|
The default behavior of a `kubelet` is to fail to start if **swap memory** is detected on a node. This means that swap should either be disabled or tolerated by `kubelet`.
|
||||||
|
|
||||||
|
My VMs are not using swap, but here how to disable it:
|
||||||
|
```bash
|
||||||
|
sudo swapoff -a
|
||||||
|
sudo sed -i '/ swap / s/^/#/' /etc/fstab
|
||||||
|
```
|
||||||
|
|
||||||
|
### Firewall
|
||||||
|
|
||||||
|
For this lab, I will just disable the local firewall (don't do that in production):
|
||||||
|
```bash
|
||||||
|
sudo systemctl disable --now ufw
|
||||||
|
```
|
||||||
|
|
||||||
|
For production, you want to allow the nodes to talk to each other on these ports:
|
||||||
|
#### Control plane
|
||||||
|
|Protocol|Direction|Port Range|Purpose|Used By|
|
||||||
|
|---|---|---|---|---|
|
||||||
|
|TCP|Inbound|6443|Kubernetes API server|All|
|
||||||
|
|TCP|Inbound|2379-2380|etcd server client API|kube-apiserver, etcd|
|
||||||
|
|TCP|Inbound|10250|Kubelet API|Self, Control plane|
|
||||||
|
|TCP|Inbound|10259|kube-scheduler|Self|
|
||||||
|
|TCP|Inbound|10257|kube-controller-manager|Self|
|
||||||
|
|
||||||
|
#### Worker
|
||||||
|
| Protocol | Direction | Port Range | Purpose | Used By |
|
||||||
|
| -------- | --------- | ----------- | ------------------ | -------------------- |
|
||||||
|
| TCP | Inbound | 10250 | Kubelet API | Self, Control plane |
|
||||||
|
| TCP | Inbound | 10256 | kube-proxy | Self, Load balancers |
|
||||||
|
| TCP | Inbound | 30000-32767 | NodePort Services† | All |
|
||||||
|
|
||||||
|
### Kernel Modules and Settings
|
||||||
|
|
||||||
|
Kubernetes needs 2 kernel modules:
|
||||||
|
- **overlay**: for facilitating the layering of one filesystem on top of another
|
||||||
|
- **br_netfilter**: for enabling bridge network connections
|
||||||
|
|
||||||
|
Let's enable them:
|
||||||
|
```bash
|
||||||
|
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
|
||||||
|
overlay
|
||||||
|
br_netfilter
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sudo modprobe overlay
|
||||||
|
sudo modprobe br_netfilter
|
||||||
|
```
|
||||||
|
|
||||||
|
Some kernel settings related to network are also needed:
|
||||||
|
```bash
|
||||||
|
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
|
||||||
|
net.bridge.bridge-nf-call-iptables = 1
|
||||||
|
net.bridge.bridge-nf-call-ip6tables = 1
|
||||||
|
net.ipv4.ip_forward = 1
|
||||||
|
EOF
|
||||||
|
|
||||||
|
sudo sysctl --system
|
||||||
|
```
|
||||||
|
|
||||||
|
### Container Runtime
|
||||||
|
|
||||||
|
You need to install a **container runtime** into each node in the cluster so that Pods can run there. I will use `containerd`:
|
||||||
|
```bash
|
||||||
|
sudo apt install -y containerd
|
||||||
|
```
|
||||||
|
|
||||||
|
Create the default configuration:
|
||||||
|
```bash
|
||||||
|
sudo mkdir -p /etc/containerd
|
||||||
|
containerd config default | sudo tee /etc/containerd/config.toml > /dev/null
|
||||||
|
```
|
||||||
|
|
||||||
|
Enable `systemd` *cgroup* driver:
|
||||||
|
```bash
|
||||||
|
sudo sed -i 's/^\(\s*SystemdCgroup\s*=\s*\)false/\1true/' /etc/containerd/config.toml
|
||||||
|
```
|
||||||
|
|
||||||
|
Restart and enable the `containerd` service
|
||||||
|
```bash
|
||||||
|
sudo systemctl restart containerd
|
||||||
|
sudo systemctl enable containerd
|
||||||
|
```
|
||||||
|
|
||||||
|
### Kubernetes Packages
|
||||||
|
|
||||||
|
Last step: install the Kubernetes packages. I start with adding the repository and its signing key.
|
||||||
|
|
||||||
|
Add the key:
|
||||||
|
```bash
|
||||||
|
curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg
|
||||||
|
```
|
||||||
|
|
||||||
|
Add the repository:
|
||||||
|
```bash
|
||||||
|
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally I can install the needed packages:
|
||||||
|
- `kubeadm`: the command to bootstrap the cluster.
|
||||||
|
- `kubelet`: the component that runs on all of the machines in your cluster and does things like starting pods and containers.
|
||||||
|
- `kubectl`: the command line util to talk to your cluster.
|
||||||
|
|
||||||
|
On the nodes, update the `apt` package index, install `kubelet` and `kubeadm`, and pin their version:
|
||||||
|
```bash
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y kubelet kubeadm
|
||||||
|
sudo apt-mark hold kubelet kubeadm
|
||||||
|
```
|
||||||
|
|
||||||
|
ℹ️ I will not manage the cluster from my nodes, I install `kubectl` on my LXC controller instead:
|
||||||
|
```bash
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y kubectl
|
||||||
|
sudo apt-mark hold kubectl
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Initialize the Cluster
|
||||||
|
|
||||||
|
Once all nodes are prepared, it’s time to initialize the Kubernetes control plane on the **first master node**.
|
||||||
|
|
||||||
|
### Bootstrap the Cluster
|
||||||
|
|
||||||
|
Run the following command to bootstrap the cluster:
|
||||||
|
```bash
|
||||||
|
sudo kubeadm init \
|
||||||
|
--control-plane-endpoint "k8s-lab.lab.vezpi.me:6443" \
|
||||||
|
--upload-certs \
|
||||||
|
--pod-network-cidr=10.10.0.0/16
|
||||||
|
```
|
||||||
|
|
||||||
|
**Explanation**:
|
||||||
|
- `--control-plane-endpoint`: DNS name for your control plane.
|
||||||
|
- `--upload-certs`: Upload the certificates that should be shared across all masters of the cluster.
|
||||||
|
- `--pod-network-cidr`: Subnet for the CNI.
|
||||||
|
|
||||||
|
This step will:
|
||||||
|
- Initialize the `etcd` database and control plane components.
|
||||||
|
- Set up RBAC and bootstrap tokens.
|
||||||
|
- Output two important `kubeadm join` commands: one for **workers**, and one for **additional control-plane nodes**.
|
||||||
|
|
||||||
|
ℹ️ The DNS name `k8s-lab.lab.vezpi.me` is handled in my homelab by **Unbound DNS**, this resolves on my **OPNsense** interface where a **HAProxy** service listen on the port 6443 and load balance between the 3 control plane nodes.
|
||||||
|
|
||||||
|
You’ll also see a message instructing you to set up your `kubectl` access.
|
||||||
|
|
||||||
|
```plaintext
|
||||||
|
I0718 07:18:29.306814 14724 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
|
||||||
|
[init] Using Kubernetes version: v1.32.7
|
||||||
|
[preflight] Running pre-flight checks
|
||||||
|
[preflight] Pulling images required for setting up a Kubernetes cluster
|
||||||
|
[preflight] This might take a minute or two, depending on the speed of your internet connection
|
||||||
|
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
|
||||||
|
W0718 07:18:29.736833 14724 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
|
||||||
|
[certs] Using certificateDir folder "/etc/kubernetes/pki"
|
||||||
|
[certs] Generating "ca" certificate and key
|
||||||
|
[certs] Generating "apiserver" certificate and key
|
||||||
|
[certs] apiserver serving cert is signed for DNS names [apex-master k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 192.168.66.167]
|
||||||
|
[certs] Generating "apiserver-kubelet-client" certificate and key
|
||||||
|
[certs] Generating "front-proxy-ca" certificate and key
|
||||||
|
[certs] Generating "front-proxy-client" certificate and key
|
||||||
|
[certs] Generating "etcd/ca" certificate and key
|
||||||
|
[certs] Generating "etcd/server" certificate and key
|
||||||
|
[certs] etcd/server serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "etcd/peer" certificate and key
|
||||||
|
[certs] etcd/peer serving cert is signed for DNS names [apex-master localhost] and IPs [192.168.66.167 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "etcd/healthcheck-client" certificate and key
|
||||||
|
[certs] Generating "apiserver-etcd-client" certificate and key
|
||||||
|
[certs] Generating "sa" key and public key
|
||||||
|
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
|
||||||
|
[kubeconfig] Writing "admin.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "super-admin.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "kubelet.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "scheduler.conf" kubeconfig file
|
||||||
|
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
|
||||||
|
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-apiserver"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-controller-manager"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-scheduler"
|
||||||
|
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
|
||||||
|
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
|
||||||
|
[kubelet-start] Starting the kubelet
|
||||||
|
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests"
|
||||||
|
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
|
||||||
|
[kubelet-check] The kubelet is healthy after 501.894876ms
|
||||||
|
[api-check] Waiting for a healthy API server. This can take up to 4m0s
|
||||||
|
[api-check] The API server is healthy after 9.030595455s
|
||||||
|
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
|
||||||
|
[kubelet] Creating a ConfigMap "kubelet-config" in namespace kube-system with the configuration for the kubelets in the cluster
|
||||||
|
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
[upload-certs] Using certificate key:
|
||||||
|
70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
|
||||||
|
[mark-control-plane] Marking the node apex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
|
||||||
|
[mark-control-plane] Marking the node apex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
|
||||||
|
[bootstrap-token] Using token: 8etamd.g8whseg60kg09nu1
|
||||||
|
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to get nodes
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
|
||||||
|
[bootstrap-token] Configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
|
||||||
|
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
|
||||||
|
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
|
||||||
|
[addons] Applied essential addon: CoreDNS
|
||||||
|
[addons] Applied essential addon: kube-proxy
|
||||||
|
|
||||||
|
Your Kubernetes control-plane has initialized successfully!
|
||||||
|
|
||||||
|
To start using your cluster, you need to run the following as a regular user:
|
||||||
|
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||||
|
|
||||||
|
Alternatively, if you are the root user, you can run:
|
||||||
|
|
||||||
|
export KUBECONFIG=/etc/kubernetes/admin.conf
|
||||||
|
|
||||||
|
You should now deploy a pod network to the cluster.
|
||||||
|
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
|
||||||
|
https://kubernetes.io/docs/concepts/cluster-administration/addons/
|
||||||
|
|
||||||
|
You can now join any number of control-plane nodes running the following command on each as root:
|
||||||
|
|
||||||
|
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
|
||||||
|
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c \
|
||||||
|
--control-plane --certificate-key 70614009469f9fc7a97c392253492c509f1884281f59ccd7725b3200e3271794
|
||||||
|
|
||||||
|
Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
|
||||||
|
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
|
||||||
|
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.
|
||||||
|
|
||||||
|
Then you can join any number of worker nodes by running the following on each as root:
|
||||||
|
|
||||||
|
kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
|
||||||
|
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
|
||||||
|
```
|
||||||
|
### Configure `kubectl`
|
||||||
|
|
||||||
|
If you want to manage your cluster from your master node, you can simply copy paste from the output of the `kubeadm init` command:
|
||||||
|
```bash
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||||
|
```
|
||||||
|
|
||||||
|
If you prefer to control the cluster from elsewhere, in my case my from my LXC bastion:
|
||||||
|
```bash
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
rsync --rsync-path="sudo rsync" <master-node>:/etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify your access:
|
||||||
|
```bash
|
||||||
|
kubectl get nodes
|
||||||
|
```
|
||||||
|
|
||||||
|
ℹ️ You should see only the first master listed (in `NotReady` state until the CNI is deployed).
|
||||||
|
|
||||||
|
### Install the CNI Plugin Cilium
|
||||||
|
|
||||||
|
From the [Cilium documentation](https://docs.cilium.io/en/stable/gettingstarted/k8s-install-default/), there are 2 common ways for installing the CNI: using the **Cilium CLI** or **Helm**, for that lab I will use the CLI tool.
|
||||||
|
|
||||||
|
#### Install the Cilium CLI
|
||||||
|
|
||||||
|
The Cilium CLI can be used to install Cilium, inspect the state of a Cilium installation, and enable/disable various features (e.g. `clustermesh`, `Hubble`). Install it on your controller where `kubectl` is installed:
|
||||||
|
```bash
|
||||||
|
CILIUM_CLI_VERSION=$(curl -s https://raw.githubusercontent.com/cilium/cilium-cli/main/stable.txt)
|
||||||
|
curl -L --fail --remote-name-all https://github.com/cilium/cilium-cli/releases/download/${CILIUM_CLI_VERSION}/cilium-linux-amd64.tar.gz{,.sha256sum}
|
||||||
|
sha256sum --check cilium-linux-amd64.tar.gz.sha256sum
|
||||||
|
sudo tar xzvfC cilium-linux-amd64.tar.gz /usr/local/bin
|
||||||
|
rm cilium-linux-amd64.tar.gz{,.sha256sum}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Install Cilium
|
||||||
|
|
||||||
|
Install Cilium into the Kubernetes cluster pointed to by your current `kubectl` context:
|
||||||
|
```bash
|
||||||
|
cilium install
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
__ Using Cilium version 1.17.5
|
||||||
|
__ Auto-detected cluster name: kubernetes
|
||||||
|
__ Auto-detected kube-proxy has been installed
|
||||||
|
```
|
||||||
|
#### Validate the Installation
|
||||||
|
|
||||||
|
To validate that Cilium has been properly installed:
|
||||||
|
```bash
|
||||||
|
cilium status --wait
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
/__\
|
||||||
|
/__\__/__\ Cilium: OK
|
||||||
|
\__/__\__/ Operator: OK
|
||||||
|
/__\__/__\ Envoy DaemonSet: OK
|
||||||
|
\__/__\__/ Hubble Relay: disabled
|
||||||
|
\__/ ClusterMesh: disabled
|
||||||
|
|
||||||
|
DaemonSet cilium Desired: 1, Ready: 1/1, Available: 1/1
|
||||||
|
DaemonSet cilium-envoy Desired: 1, Ready: 1/1, Available: 1/1
|
||||||
|
Deployment cilium-operator Desired: 1, Ready: 1/1, Available: 1/1
|
||||||
|
Containers: cilium Running: 1
|
||||||
|
cilium-envoy Running: 1
|
||||||
|
cilium-operator Running: 1
|
||||||
|
clustermesh-apiserver
|
||||||
|
hubble-relay
|
||||||
|
Cluster Pods: 0/2 managed by Cilium
|
||||||
|
Helm chart version: 1.17.5
|
||||||
|
Image versions cilium quay.io/cilium/cilium:v1.17.5@sha256:baf8541723ee0b72d6c489c741c81a6fdc5228940d66cb76ef5ea2ce3c639ea6: 1
|
||||||
|
cilium-envoy quay.io/cilium/cilium-envoy:v1.32.6-1749271279-0864395884b263913eac200ee2048fd985f8e626@sha256:9f69e290a7ea3d4edf9192acd81694089af048ae0d8a67fb63bd62dc1d72203e: 1
|
||||||
|
cilium-operator quay.io/cilium/operator-generic:v1.17.5@sha256:f954c97eeb1b47ed67d08cc8fb4108fb829f869373cbb3e698a7f8ef1085b09e: 1
|
||||||
|
```
|
||||||
|
|
||||||
|
Once installed, the master node should transition to `Ready` status:
|
||||||
|
```plaintext
|
||||||
|
NAME STATUS ROLES AGE VERSION
|
||||||
|
apex-master Ready control-plane 99m v1.32.7
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Join Additional Nodes
|
||||||
|
|
||||||
|
After initializing the first control plane node, you can now join the remaining nodes to the cluster.
|
||||||
|
|
||||||
|
There are two types of join commands:
|
||||||
|
- One for joining **control-plane (master) nodes**
|
||||||
|
- One for joining **worker nodes**
|
||||||
|
|
||||||
|
These commands were displayed at the end of the `kubeadm init` output. If you didn’t copy them, you can regenerate them.
|
||||||
|
|
||||||
|
⚠️ The certificates and the decryption key expire after two hours.
|
||||||
|
|
||||||
|
### Additional Masters
|
||||||
|
|
||||||
|
You can now join any number of control-plane node by running the command given by the `kubeadm init` command:
|
||||||
|
```bash
|
||||||
|
sudo kubeadm join <control-plane-endpoint> --token <token> --discovery-token-ca-cert-hash <discovery-token-ca-cert-hash> --control-plane --certificate-key <certificate-key>
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
[preflight] Running pre-flight checks
|
||||||
|
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
|
||||||
|
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
|
||||||
|
[preflight] Running pre-flight checks before initializing the new control plane instance
|
||||||
|
[preflight] Pulling images required for setting up a Kubernetes cluster
|
||||||
|
[preflight] This might take a minute or two, depending on the speed of your internet connection
|
||||||
|
[preflight] You can also perform this action beforehand using 'kubeadm config images pull'
|
||||||
|
W0718 09:27:32.248290 12043 checks.go:846] detected that the sandbox image "registry.k8s.io/pause:3.8" of the container runtime is inconsistent with that used by kubeadm.It is recommended to use "registry.k8s.io/pause:3.10" as the CRI sandbox image.
|
||||||
|
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
[download-certs] Saving the certificates to the folder: "/etc/kubernetes/pki"
|
||||||
|
[certs] Using certificateDir folder "/etc/kubernetes/pki"
|
||||||
|
[certs] Generating "etcd/server" certificate and key
|
||||||
|
[certs] etcd/server serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "etcd/peer" certificate and key
|
||||||
|
[certs] etcd/peer serving cert is signed for DNS names [localhost vertex-master] and IPs [192.168.66.169 127.0.0.1 ::1]
|
||||||
|
[certs] Generating "apiserver-etcd-client" certificate and key
|
||||||
|
[certs] Generating "etcd/healthcheck-client" certificate and key
|
||||||
|
[certs] Generating "apiserver" certificate and key
|
||||||
|
[certs] apiserver serving cert is signed for DNS names [k8s-lab.lab.vezpi.me kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local vertex-master] and IPs [10.96.0.1 192.168.66.169]
|
||||||
|
[certs] Generating "apiserver-kubelet-client" certificate and key
|
||||||
|
[certs] Generating "front-proxy-client" certificate and key
|
||||||
|
[certs] Valid certificates and keys now exist in "/etc/kubernetes/pki"
|
||||||
|
[certs] Using the existing "sa" key
|
||||||
|
[kubeconfig] Generating kubeconfig files
|
||||||
|
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
|
||||||
|
[kubeconfig] Writing "admin.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
|
||||||
|
[kubeconfig] Writing "scheduler.conf" kubeconfig file
|
||||||
|
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-apiserver"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-controller-manager"
|
||||||
|
[control-plane] Creating static Pod manifest for "kube-scheduler"
|
||||||
|
[check-etcd] Checking that the etcd cluster is healthy
|
||||||
|
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
|
||||||
|
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
|
||||||
|
[kubelet-start] Starting the kubelet
|
||||||
|
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
|
||||||
|
[kubelet-check] The kubelet is healthy after 501.761616ms
|
||||||
|
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
|
||||||
|
[etcd] Announced new etcd member joining to the existing etcd cluster
|
||||||
|
[etcd] Creating static Pod manifest for "etcd"
|
||||||
|
{"level":"warn","ts":"2025-07-18T09:27:36.040077Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
|
||||||
|
[...]
|
||||||
|
{"level":"warn","ts":"2025-07-18T09:27:44.976805Z","logger":"etcd-client","caller":"v3@v3.5.16/retry_interceptor.go:63","msg":"retrying of unary invoker failed","target":"etcd-endpoints://0xc00037ab40/192.168.66.167:2379","attempt":0,"error":"rpc error: code = FailedPrecondition desc = etcdserver: can only promote a learner member which is in sync with leader"}
|
||||||
|
[etcd] Waiting for the new etcd member to join the cluster. This can take up to 40s
|
||||||
|
[mark-control-plane] Marking the node vertex-master as control-plane by adding the labels: [node-role.kubernetes.io/control-plane node.kubernetes.io/exclude-from-external-load-balancers]
|
||||||
|
[mark-control-plane] Marking the node vertex-master as control-plane by adding the taints [node-role.kubernetes.io/control-plane:NoSchedule]
|
||||||
|
|
||||||
|
This node has joined the cluster and a new control plane instance was created:
|
||||||
|
|
||||||
|
* Certificate signing request was sent to apiserver and approval was received.
|
||||||
|
* The Kubelet was informed of the new secure connection details.
|
||||||
|
* Control plane label and taint were applied to the new node.
|
||||||
|
* The Kubernetes control plane instances scaled up.
|
||||||
|
* A new etcd member was added to the local/stacked etcd cluster.
|
||||||
|
|
||||||
|
To start administering your cluster from this node, you need to run the following as a regular user:
|
||||||
|
|
||||||
|
mkdir -p $HOME/.kube
|
||||||
|
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
|
||||||
|
sudo chown $(id -u):$(id -g) $HOME/.kube/config
|
||||||
|
|
||||||
|
Run 'kubectl get nodes' to see this node join the cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Regenerate Certificates
|
||||||
|
|
||||||
|
If the certificate is expired, you would see a message like this on the `kubeadm join` command:
|
||||||
|
```plaintext
|
||||||
|
[download-certs] Downloading the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
|
||||||
|
```
|
||||||
|
|
||||||
|
If so, re-upload the certificates and generate a new decryption key, use the following command on a control plane node that is already joined to the cluster:
|
||||||
|
```bash
|
||||||
|
sudo kubeadm init phase upload-certs --upload-certs
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
I0718 09:26:12.448472 18624 version.go:261] remote version is much newer: v1.33.3; falling back to: stable-1.32
|
||||||
|
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
|
||||||
|
[upload-certs] Using certificate key:
|
||||||
|
7531149107ebc3caf4990f94d19824aecf39d93b84ee1b9c86aee84c04e76656
|
||||||
|
```
|
||||||
|
#### Generate Token
|
||||||
|
|
||||||
|
Paired with the certificate, you'll need a new token, this will print the whole join command as control plane:
|
||||||
|
```bash
|
||||||
|
sudo kubeadm token create --print-join-command --certificate-key <certificate-key>
|
||||||
|
```
|
||||||
|
|
||||||
|
Use the command given to join the Kubernetes cluster on the desired node as master.
|
||||||
|
|
||||||
|
### Join Workers
|
||||||
|
|
||||||
|
You can join any number of worker nodes by running the following
|
||||||
|
```bash
|
||||||
|
sudo kubeadm join k8s-lab.lab.vezpi.me:6443 --token 8etamd.g8whseg60kg09nu1 \
|
||||||
|
--discovery-token-ca-cert-hash sha256:65c4da3121f57d2e67ea6c1c1349544c9e295d78790b199b5c3be908ffe5ed6c
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
[preflight] Running pre-flight checks
|
||||||
|
[preflight] Reading configuration from the "kubeadm-config" ConfigMap in namespace "kube-system"...
|
||||||
|
[preflight] Use 'kubeadm init phase upload-config --config your-config.yaml' to re-upload it.
|
||||||
|
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
|
||||||
|
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
|
||||||
|
[kubelet-start] Starting the kubelet
|
||||||
|
[kubelet-check] Waiting for a healthy kubelet at http://127.0.0.1:10248/healthz. This can take up to 4m0s
|
||||||
|
[kubelet-check] The kubelet is healthy after 506.731798ms
|
||||||
|
[kubelet-start] Waiting for the kubelet to perform the TLS Bootstrap
|
||||||
|
|
||||||
|
This node has joined the cluster:
|
||||||
|
* Certificate signing request was sent to apiserver and a response was received.
|
||||||
|
* The Kubelet was informed of the new secure connection details.
|
||||||
|
|
||||||
|
Run 'kubectl get nodes' on the control-plane to see this node join the cluster.
|
||||||
|
```
|
||||||
|
|
||||||
|
Again here if you missed the output of the `kubeadm init`, you can generate a new token and the full `join` command:
|
||||||
|
```bash
|
||||||
|
sudo kubeadm token create --print-join-command
|
||||||
|
```
|
||||||
|
|
||||||
|
Use the command given to join the Kubernetes cluster on the desired node as worker.
|
||||||
|
|
||||||
|
### Verify Cluster
|
||||||
|
|
||||||
|
From your controller, you can verify if all the nodes joined the cluster and are in the `Ready` status:
|
||||||
|
```bash
|
||||||
|
kubectl get node
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
NAME STATUS ROLES AGE VERSION
|
||||||
|
apex-master Ready control-plane 154m v1.32.7
|
||||||
|
apex-worker Ready <none> 5m14s v1.32.7
|
||||||
|
vertex-master Ready control-plane 26m v1.32.7
|
||||||
|
vertex-worker Ready <none> 3m39s v1.32.7
|
||||||
|
zenith-master Ready control-plane 23m v1.32.7
|
||||||
|
zenith-worker Ready <none> 3m26s v1.32.7
|
||||||
|
```
|
||||||
|
|
||||||
|
To validate that your cluster has proper network connectivity:
|
||||||
|
```bash
|
||||||
|
cilium connectivity test
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
__ Monitor aggregation detected, will skip some flow validation steps
|
||||||
|
[kubernetes] Creating namespace cilium-test-1 for connectivity check...
|
||||||
|
__ [kubernetes] Deploying echo-same-node service...
|
||||||
|
__ [kubernetes] Deploying DNS test server configmap...
|
||||||
|
__ [kubernetes] Deploying same-node deployment...
|
||||||
|
__ [kubernetes] Deploying client deployment...
|
||||||
|
__ [kubernetes] Deploying client2 deployment...
|
||||||
|
__ [kubernetes] Deploying client3 deployment...
|
||||||
|
__ [kubernetes] Deploying echo-other-node service...
|
||||||
|
__ [kubernetes] Deploying other-node deployment...
|
||||||
|
__ [host-netns] Deploying kubernetes daemonset...
|
||||||
|
__ [host-netns-non-cilium] Deploying kubernetes daemonset...
|
||||||
|
__ Skipping tests that require a node Without Cilium
|
||||||
|
[kubernetes] Waiting for deployment cilium-test-1/client to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/client2 to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/echo-same-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/client3 to become ready...
|
||||||
|
__ [kubernetes] Waiting for deployment cilium-test-1/echo-other-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-same-node-6c98489c8d-547mc pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach DNS server on cilium-test-1/echo-other-node-6d774d44c4-gzkmd pod...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client2-66475877c6-gpdkz to reach default/kubernetes service...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client3-795488bf5-xrlbp to reach default/kubernetes service...
|
||||||
|
__ [kubernetes] Waiting for pod cilium-test-1/client-645b68dcf7-ps276 to reach default/kubernetes service...
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-6824w
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-other-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to become ready...
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-6824w
|
||||||
|
__ [kubernetes] Waiting for Service cilium-test-1/echo-same-node to be synchronized by Cilium pod kube-system/cilium-jc4fx
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.166:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.166:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.172:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.172:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.167:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.167:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.168:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.168:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.169:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.169:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.170:32391 (cilium-test-1/echo-other-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for NodePort 192.168.66.170:32055 (cilium-test-1/echo-same-node) to become ready...
|
||||||
|
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns-non-cilium to become ready...
|
||||||
|
__ [kubernetes] Waiting for DaemonSet cilium-test-1/host-netns to become ready...
|
||||||
|
__ Skipping IPCache check
|
||||||
|
Enabling Hubble telescope...
|
||||||
|
__ Unable to contact Hubble Relay, disabling Hubble telescope and flow validation: rpc error: code = Unavailable desc = connection error: desc = "transport: Error while dialing: dial tcp [::1]:4245: connect: connection refused"
|
||||||
|
Expose Relay locally with:
|
||||||
|
cilium hubble enable
|
||||||
|
cilium hubble port-forward&
|
||||||
|
__ Cilium version: 1.17.5
|
||||||
|
[cilium-test-1] Running 123 tests ...
|
||||||
|
[=] [cilium-test-1] Test [no-policies] [1/123]
|
||||||
|
[...]
|
||||||
|
[=] [cilium-test-1] Test [check-log-errors] [123/123]
|
||||||
|
.................................................
|
||||||
|
__ [cilium-test-1] All 73 tests (739 actions) successful, 50 tests skipped, 1 scenarios skipped.
|
||||||
|
```
|
||||||
|
|
||||||
|
⌛This connectivity test could take up to 30 minutes.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
🚀 Our highly available Kubernetes cluster is ready!
|
||||||
|
|
||||||
|
In this post, we walked through the **manual creation of a Kubernetes cluster** in my homelab using `kubeadm`, on top of 6 Ubuntu VMs (3 masters and 3 workers) previously provisioned with Terraform on Proxmox.
|
||||||
|
|
||||||
|
We went step by step:
|
||||||
|
- Preparing the nodes with the required tools, kernel modules, and container runtime
|
||||||
|
- Installing the Kubernetes packages
|
||||||
|
- Bootstrapping the cluster from the first master node
|
||||||
|
- Joining additional control-plane and worker nodes
|
||||||
|
- Verifying that the cluster is healthy and ready
|
||||||
|
|
||||||
|
This manual approach helps to demystify how Kubernetes clusters are built behind the scenes. It’s a solid foundation before automating the process in future posts using tools like Ansible.
|
||||||
|
|
||||||
|
Stay tuned, next time we’ll look into automating all of this!
|
||||||
|
|
||||||
|
|
@@ -0,0 +1,634 @@
|
|||||||
|
---
|
||||||
|
slug: expose-kubernetes-pods-externally-ingress-tls
|
||||||
|
title: Exposer des Pods Kubernetes en externe avec Ingress et TLS
|
||||||
|
description: Découvrez comment exposer des pods Kubernetes en externe avec Services, Ingress et TLS grâce à BGP, NGINX et Cert-Manager dans un homelab.
|
||||||
|
date: 2025-08-19
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- kubernetes
|
||||||
|
- helm
|
||||||
|
- bgp
|
||||||
|
- opnsense
|
||||||
|
- cilium
|
||||||
|
- nginx-ingress-controller
|
||||||
|
- cert-manager
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
Après avoir construit mon propre cluster Kubernetes dans mon homelab avec `kubeadm` dans [cet article]({{< ref "post/8-create-manual-kubernetes-cluster-kubeadm" >}}), mon prochain défi est d’exposer un pod simple à l’extérieur, accessible via une URL et sécurisé avec un certificat TLS validé par Let’s Encrypt.
|
||||||
|
|
||||||
|
Pour y parvenir, j’ai besoin de configurer plusieurs composants :
|
||||||
|
- **Service** : Expose le pod à l’intérieur du cluster et fournit un point d’accès.
|
||||||
|
- **Ingress** : Définit des règles de routage pour exposer des services HTTP(S) à l’extérieur.
|
||||||
|
- **Ingress Controller** : Surveille les ressources Ingress et gère réellement le routage du trafic.
|
||||||
|
- **Certificats TLS** : Sécurisent le trafic en HTTPS grâce à des certificats délivrés par Let’s Encrypt.
|
||||||
|
|
||||||
|
Cet article vous guide pas à pas pour comprendre comment fonctionne l’accès externe dans Kubernetes dans un environnement homelab.
|
||||||
|
|
||||||
|
C'est parti.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Helm
|
||||||
|
|
||||||
|
J’utilise **Helm**, le gestionnaire de paquets de facto pour Kubernetes, afin d’installer des composants externes comme l’Ingress Controller ou cert-manager.
|
||||||
|
|
||||||
|
### Pourquoi Helm
|
||||||
|
|
||||||
|
Helm simplifie le déploiement et la gestion des applications Kubernetes. Au lieu d’écrire et de maintenir de longs manifestes YAML, Helm permet d’installer des applications en une seule commande, en s’appuyant sur des charts versionnés et configurables.
|
||||||
|
|
||||||
|
### Installer Helm
|
||||||
|
|
||||||
|
J’installe Helm sur mon hôte bastion LXC, qui dispose déjà d’un accès au cluster Kubernetes :
|
||||||
|
```bash
|
||||||
|
curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null
|
||||||
|
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install helm
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Services Kubernetes
|
||||||
|
|
||||||
|
Avant de pouvoir exposer un pod à l’extérieur, il faut d’abord le rendre accessible à l’intérieur du cluster. C’est là qu’interviennent les **Services Kubernetes**.
|
||||||
|
|
||||||
|
Les Services agissent comme un pont entre les pods et le réseau, garantissant que les applications restent accessibles même si les pods sont réordonnés ou redéployés.
|
||||||
|
|
||||||
|
Il existe plusieurs types de Services Kubernetes, chacun avec un objectif différent :
|
||||||
|
- **ClusterIP** expose le Service sur une IP interne au cluster, uniquement accessible depuis l’intérieur.
|
||||||
|
- **NodePort** expose le Service sur un port statique de l’IP de chaque nœud, accessible depuis l’extérieur du cluster.
|
||||||
|
- **LoadBalancer** expose le Service sur une IP externe, généralement via une intégration cloud (ou via BGP dans un homelab).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Exposer un Service `LoadBalancer` avec BGP
|
||||||
|
|
||||||
|
Au départ, j’ai envisagé d’utiliser **MetalLB** pour exposer les adresses IP des services sur mon réseau local. C’est ce que j’utilisais auparavant quand je dépendais de la box de mon FAI comme routeur principal. Mais après avoir lu cet article, [Use Cilium BGP integration with OPNsense](https://devopstales.github.io/kubernetes/cilium-opnsense-bgp/), je réalise que je peux obtenir le même résultat (voire mieux) en utilisant **BGP** avec mon routeur **OPNsense** et **Cilium**, mon CNI.
|
||||||
|
|
||||||
|
### Qu’est-ce que BGP ?
|
||||||
|
|
||||||
|
BGP (_Border Gateway Protocol_) est un protocole de routage utilisé pour échanger des routes entre systèmes. Dans un homelab Kubernetes, BGP permet à tes nœuds Kubernetes d’annoncer directement leurs IPs à ton routeur ou firewall. Ton routeur sait alors exactement comment atteindre les adresses IP gérées par ton cluster.
|
||||||
|
|
||||||
|
Au lieu que MetalLB gère l’allocation d’IP et les réponses ARP, tes nœuds disent directement à ton routeur : « Hé, c’est moi qui possède l’adresse 192.168.1.240 ».
|
||||||
|
|
||||||
|
### L’approche MetalLB classique
|
||||||
|
|
||||||
|
Sans BGP, MetalLB en mode Layer 2 fonctionne comme ceci :
|
||||||
|
- Il assigne une adresse IP `LoadBalancer` (par exemple `192.168.1.240`) depuis un pool.
|
||||||
|
- Un nœud répond aux requêtes ARP pour cette IP sur ton LAN.
|
||||||
|
|
||||||
|
Oui, MetalLB peut aussi fonctionner avec BGP, mais pourquoi l’utiliser si mon CNI (Cilium) le gère déjà nativement ?
|
||||||
|
|
||||||
|
### BGP avec Cilium
|
||||||
|
|
||||||
|
Avec Cilium + BGP, tu obtiens :
|
||||||
|
- L’agent Cilium du nœud annonce les IPs `LoadBalancer` via BGP.
|
||||||
|
- Ton routeur apprend ces routes et les envoie au bon nœud.
|
||||||
|
- Plus besoin de MetalLB.
|
||||||
|
|
||||||
|
### Configuration BGP
|
||||||
|
|
||||||
|
BGP est désactivé par défaut, aussi bien sur OPNsense que sur Cilium. Activons-le des deux côtés.
|
||||||
|
|
||||||
|
#### Sur OPNsense
|
||||||
|
|
||||||
|
D’après la [documentation officielle OPNsense](https://docs.opnsense.org/manual/dynamic_routing.html#bgp-section), l’activation de BGP nécessite d’installer un plugin.
|
||||||
|
|
||||||
|
Va dans `System` > `Firmware` > `Plugins` et installe le plugin **os-frr** :
|
||||||
|

|
||||||
|
Installer le plugin `os-frr` dans OPNsense
|
||||||
|
|
||||||
|
Une fois installé, active le plugin dans `Routing` > `General` :
|
||||||
|

|
||||||
|
Activer le routage dans OPNsense
|
||||||
|
|
||||||
|
Ensuite, rends-toi dans la section **BGP**. Dans l’onglet **General** :
|
||||||
|
- Coche la case pour activer BGP.
|
||||||
|
- Défini ton **ASN BGP**. J’ai choisi `64512`, le premier ASN privé de la plage réservée (voir [ASN table](https://en.wikipedia.org/wiki/Autonomous_system_\(Internet\)#ASN_Table)) :
|
||||||
|

|
||||||
|
|
||||||
|
Ajoute ensuite tes voisins BGP. Je ne fais le peering qu’avec mes **nœuds workers** (puisque seuls eux hébergent des workloads). Pour chaque voisin :
|
||||||
|
- Mets l’IP du nœud dans `Peer-IP`.
|
||||||
|
- Utilise `64513` comme **Remote AS** (celui de Cilium).
|
||||||
|
- Configure `Update-Source Interface` sur `Lab`.
|
||||||
|
- Coche `Next-Hop-Self`.
|
||||||
|

|
||||||
|
|
||||||
|
Voici la liste de mes voisins une fois configurés :
|
||||||
|

|
||||||
|
Liste des voisins BGP
|
||||||
|
|
||||||
|
N’oublie pas la règle firewall pour autoriser BGP (port `179/TCP`) depuis le VLAN **Lab** vers le firewall :
|
||||||
|

|
||||||
|
Autoriser TCP/179 de Lab vers OPNsense
|
||||||
|
|
||||||
|
#### Dans Cilium
|
||||||
|
|
||||||
|
J’ai déjà Cilium installé et je n’ai pas trouvé comment activer BGP avec la CLI, donc je l’ai simplement réinstallé avec l’option BGP :
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cilium uninstall
|
||||||
|
cilium install --set bgpControlPlane.enabled=true
|
||||||
|
```
|
||||||
|
|
||||||
|
Je configure uniquement les **nœuds workers** pour établir le peering BGP en les labellisant avec un `nodeSelector` :
|
||||||
|
```bash
|
||||||
|
kubectl label node apex-worker node-role.kubernetes.io/worker=""
|
||||||
|
kubectl label node vertex-worker node-role.kubernetes.io/worker=""
|
||||||
|
kubectl label node zenith-worker node-role.kubernetes.io/worker=""
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
NAME STATUS ROLES AGE VERSION
|
||||||
|
apex-master Ready control-plane 5d4h v1.32.7
|
||||||
|
apex-worker Ready worker 5d1h v1.32.7
|
||||||
|
vertex-master Ready control-plane 5d1h v1.32.7
|
||||||
|
vertex-worker Ready worker 5d1h v1.32.7
|
||||||
|
zenith-master Ready control-plane 5d1h v1.32.7
|
||||||
|
zenith-worker Ready worker 5d1h v1.32.7
|
||||||
|
```
|
||||||
|
|
||||||
|
Pour la configuration BGP complète, j’ai besoin de :
|
||||||
|
- **CiliumBGPClusterConfig** : paramètres BGP pour le cluster Cilium, incluant son ASN local et son pair.
|
||||||
|
- **CiliumBGPPeerConfig** : définit les timers, le redémarrage gracieux et les routes annoncées.
|
||||||
|
- **CiliumBGPAdvertisement** : indique quels services Kubernetes annoncer via BGP.
|
||||||
|
- **CiliumLoadBalancerIPPool** : définit la plage d’IPs attribuées aux services `LoadBalancer`.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumBGPClusterConfig
|
||||||
|
metadata:
|
||||||
|
name: bgp-cluster
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
matchLabels:
|
||||||
|
node-role.kubernetes.io/worker: "" # Only for worker nodes
|
||||||
|
bgpInstances:
|
||||||
|
- name: "cilium-bgp-cluster"
|
||||||
|
localASN: 64513 # Cilium ASN
|
||||||
|
peers:
|
||||||
|
- name: "pfSense-peer"
|
||||||
|
peerASN: 64512 # OPNsense ASN
|
||||||
|
peerAddress: 192.168.66.1 # OPNsense IP
|
||||||
|
peerConfigRef:
|
||||||
|
name: "bgp-peer"
|
||||||
|
---
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumBGPPeerConfig
|
||||||
|
metadata:
|
||||||
|
name: bgp-peer
|
||||||
|
spec:
|
||||||
|
timers:
|
||||||
|
holdTimeSeconds: 9
|
||||||
|
keepAliveTimeSeconds: 3
|
||||||
|
gracefulRestart:
|
||||||
|
enabled: true
|
||||||
|
restartTimeSeconds: 15
|
||||||
|
families:
|
||||||
|
- afi: ipv4
|
||||||
|
safi: unicast
|
||||||
|
advertisements:
|
||||||
|
matchLabels:
|
||||||
|
advertise: "bgp"
|
||||||
|
---
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumBGPAdvertisement
|
||||||
|
metadata:
|
||||||
|
name: bgp-advertisement
|
||||||
|
labels:
|
||||||
|
advertise: bgp
|
||||||
|
spec:
|
||||||
|
advertisements:
|
||||||
|
- advertisementType: "Service"
|
||||||
|
service:
|
||||||
|
addresses:
|
||||||
|
- LoadBalancerIP
|
||||||
|
selector:
|
||||||
|
matchExpressions:
|
||||||
|
- { key: somekey, operator: NotIn, values: [ never-used-value ] }
|
||||||
|
---
|
||||||
|
apiVersion: "cilium.io/v2alpha1"
|
||||||
|
kind: CiliumLoadBalancerIPPool
|
||||||
|
metadata:
|
||||||
|
name: "dmz"
|
||||||
|
spec:
|
||||||
|
blocks:
|
||||||
|
- start: "192.168.55.20" # LB Range Start IP
|
||||||
|
stop: "192.168.55.250" # LB Range End IP
|
||||||
|
```
|
||||||
|
|
||||||
|
Applique la configuration :
|
||||||
|
```bash
|
||||||
|
kubectl apply -f bgp.yaml
|
||||||
|
|
||||||
|
ciliumbgpclusterconfig.cilium.io/bgp-cluster created
|
||||||
|
ciliumbgppeerconfig.cilium.io/bgp-peer created
|
||||||
|
ciliumbgpadvertisement.cilium.io/bgp-advertisement created
|
||||||
|
ciliumloadbalancerippool.cilium.io/dmz created
|
||||||
|
```
|
||||||
|
|
||||||
|
Si tout fonctionne, tu devrais voir les sessions BGP **établies** avec tes workers :
|
||||||
|
```bash
|
||||||
|
cilium bgp peers
|
||||||
|
|
||||||
|
Node Local AS Peer AS Peer Address Session State Uptime Family Received Advertised
|
||||||
|
apex-worker 64513 64512 192.168.66.1 established 6m30s ipv4/unicast 1 2
|
||||||
|
vertex-worker 64513 64512 192.168.66.1 established 7m9s ipv4/unicast 1 2
|
||||||
|
zenith-worker 64513 64512 192.168.66.1 established 6m13s ipv4/unicast 1 2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Déployer un Service `LoadBalancer` avec BGP
|
||||||
|
|
||||||
|
Validons rapidement que la configuration fonctionne en déployant un `Deployment` de test et un `Service` de type `LoadBalancer` :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: test-lb
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 80
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
svc: test-lb
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: nginx
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
svc: test-lb
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
svc: test-lb
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: nginx
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Vérifions si le service obtient une IP externe :
|
||||||
|
```bash
|
||||||
|
kubectl get services test-lb
|
||||||
|
|
||||||
|
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||||
|
test-lb LoadBalancer 10.100.167.198 192.168.55.20 80:31350/TCP 169m
|
||||||
|
```
|
||||||
|
|
||||||
|
Le service a récupéré la première IP du pool défini : `192.168.55.20`.
|
||||||
|
|
||||||
|
Depuis n’importe quel appareil du LAN, on peut tester l’accès sur le port 80 :
|
||||||
|

|
||||||
|
|
||||||
|
✅ Notre pod est joignable via une IP `LoadBalancer` routée en BGP. Première étape réussie !
|
||||||
|
|
||||||
|
---
|
||||||
|
## Kubernetes Ingress
|
||||||
|
|
||||||
|
Nous avons réussi à exposer un pod en externe en utilisant un service `LoadBalancer` et une adresse IP attribuée via BGP. Cette approche fonctionne très bien pour les tests, mais elle ne fonctionne pas à l’échelle.
|
||||||
|
|
||||||
|
Imagine avoir 10, 20 ou 50 services différents. Est-ce que je voudrais vraiment allouer 50 adresses IP et encombrer mon firewall ainsi que mes tables de routage avec 50 entrées BGP ? Certainement pas.
|
||||||
|
|
||||||
|
C’est là qu’intervient **Ingress**.
|
||||||
|
|
||||||
|
### Qu’est-ce qu’un Kubernetes Ingress ?
|
||||||
|
|
||||||
|
Un Kubernetes **Ingress** est un objet API qui gère **l’accès externe aux services** d’un cluster, généralement en HTTP et HTTPS, le tout via un point d’entrée unique.
|
||||||
|
|
||||||
|
Au lieu d’attribuer une IP par service, on définit des règles de routage basées sur :
|
||||||
|
- **Des noms d’hôtes** (`app1.vezpi.me`, `blog.vezpi.me`, etc.)
|
||||||
|
- **Des chemins** (`/grafana`, `/metrics`, etc.)
|
||||||
|
|
||||||
|
|
||||||
|
Avec Ingress, je peux exposer plusieurs services via la même IP et le même port (souvent 443 pour HTTPS), et Kubernetes saura comment router la requête vers le bon service backend.
|
||||||
|
|
||||||
|
Voici un exemple simple d’`Ingress`, qui route le trafic de `test.vezpi.me` vers le service `test-lb` sur le port 80 :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ingress Controller
|
||||||
|
|
||||||
|
Un Ingress, en soi, n’est qu’un ensemble de règles de routage. Il ne traite pas réellement le trafic. Pour le rendre fonctionnel, il faut un **Ingress Controller**, qui va :
|
||||||
|
- Surveiller l’API Kubernetes pour détecter les ressources `Ingress`.
|
||||||
|
- Ouvrir les ports HTTP(S) via un service `LoadBalancer` ou `NodePort`.
|
||||||
|
- Router le trafic vers le bon `Service` selon les règles de l’Ingress.
|
||||||
|
|
||||||
|
Parmi les contrôleurs populaires, on retrouve NGINX, Traefik, HAProxy, et d’autres encore. Comme je cherchais quelque chose de simple, stable et largement adopté, j’ai choisi le **NGINX Ingress Controller**.
|
||||||
|
|
||||||
|
### Installer NGINX Ingress Controller
|
||||||
|
|
||||||
|
J’utilise Helm pour installer le contrôleur, et je définis `controller.ingressClassResource.default=true` pour que tous mes futurs ingress l’utilisent par défaut :
|
||||||
|
```bash
|
||||||
|
helm install ingress-nginx \
|
||||||
|
--repo=https://kubernetes.github.io/ingress-nginx \
|
||||||
|
--namespace=ingress-nginx \
|
||||||
|
--create-namespace ingress-nginx \
|
||||||
|
--set controller.ingressClassResource.default=true \
|
||||||
|
--set controller.config.strict-validate-path-type=false
|
||||||
|
```
|
||||||
|
|
||||||
|
Le contrôleur est déployé et expose un service `LoadBalancer`. Dans mon cas, il récupère la deuxième adresse IP disponible dans la plage BGP :
|
||||||
|
```bash
|
||||||
|
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
|
||||||
|
ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.21 80:31195/TCP,443:30974/TCP 75s app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
### Réserver une IP statique pour le contrôleur
|
||||||
|
|
||||||
|
Je veux m’assurer que l’Ingress Controller reçoive toujours la même adresse IP. Pour cela, j’ai créé deux pools d’IP Cilium distincts :
|
||||||
|
- Un réservé pour l’Ingress Controller avec une seule IP.
|
||||||
|
- Un pour tout le reste.
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
# Pool for Ingress Controller
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumLoadBalancerIPPool
|
||||||
|
metadata:
|
||||||
|
name: ingress-nginx
|
||||||
|
spec:
|
||||||
|
blocks:
|
||||||
|
- cidr: 192.168.55.55/32
|
||||||
|
serviceSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: ingress-nginx
|
||||||
|
app.kubernetes.io/component: controller
|
||||||
|
---
|
||||||
|
# Default pool for other services
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumLoadBalancerIPPool
|
||||||
|
metadata:
|
||||||
|
name: default
|
||||||
|
spec:
|
||||||
|
blocks:
|
||||||
|
- start: 192.168.55.100
|
||||||
|
stop: 192.168.55.250
|
||||||
|
serviceSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app.kubernetes.io/name
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- ingress-nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
Après avoir remplacé le pool partagé par ces deux pools, l’Ingress Controller reçoit bien l’IP dédiée `192.168.55.55`, et le service `test-lb` obtient `192.168.55.100` comme prévu :
|
||||||
|
```bash
|
||||||
|
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||||
|
default test-lb LoadBalancer 10.100.167.198 192.168.55.100 80:31350/TCP 6h34m
|
||||||
|
ingress-nginx ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.55 80:31195/TCP,443:30974/TCP 24m
|
||||||
|
```
|
||||||
|
### Associer un Service à un Ingress
|
||||||
|
|
||||||
|
Maintenant, connectons un service à ce contrôleur.
|
||||||
|
|
||||||
|
Je commence par mettre à jour le service `LoadBalancer` d’origine pour le convertir en `ClusterIP` (puisque c’est désormais l’Ingress Controller qui l’exposera en externe) :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: test-lb
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 80
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
svc: test-lb
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Ensuite, j’applique le manifeste `Ingress` pour exposer le service en HTTP.
|
||||||
|
|
||||||
|
Comme j’utilise le plugin **Caddy** dans OPNsense, j’ai encore besoin d’un routage local de type Layer 4 pour rediriger le trafic de `test.vezpi.me` vers l’adresse IP de l’Ingress Controller (`192.168.55.55`). Je crée donc une nouvelle règle dans le plugin Caddy.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Puis je teste l’accès dans le navigateur :
|
||||||
|

|
||||||
|
Test d’un Ingress en HTTP
|
||||||
|
|
||||||
|
✅ Mon pod est désormais accessible via son URL HTTP en utilisant un Ingress. Deuxième étape complétée !
|
||||||
|
|
||||||
|
---
|
||||||
|
## Connexion sécurisée avec TLS
|
||||||
|
|
||||||
|
Exposer des services en HTTP simple est suffisant pour des tests, mais en pratique nous voulons presque toujours utiliser **HTTPS**. Les certificats TLS chiffrent le trafic et garantissent l’authenticité ainsi que la confiance pour les utilisateurs.
|
||||||
|
|
||||||
|
### Cert-Manager
|
||||||
|
|
||||||
|
Pour automatiser la gestion des certificats dans Kubernetes, nous utilisons **Cert-Manager**. Il peut demander, renouveler et gérer les certificats TLS sans intervention manuelle.
|
||||||
|
|
||||||
|
#### Installer Cert-Manager
|
||||||
|
|
||||||
|
Nous le déployons avec Helm dans le cluster :
|
||||||
|
```bash
|
||||||
|
helm repo add jetstack https://charts.jetstack.io
|
||||||
|
helm repo update
|
||||||
|
helm install cert-manager jetstack/cert-manager \
|
||||||
|
--namespace cert-manager \
|
||||||
|
--create-namespace \
|
||||||
|
--set crds.enabled=true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Configurer Cert-Manager
|
||||||
|
|
||||||
|
Ensuite, nous configurons un **ClusterIssuer** pour Let’s Encrypt. Cette ressource indique à Cert-Manager comment demander des certificats :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt-staging
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
server: https://acme-staging-v02.api.letsencrypt.org/directory
|
||||||
|
email: <email>
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-staging-key
|
||||||
|
solvers:
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
ingressClassName: nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
ℹ️ Ici, je définis le serveur **staging** de Let’s Encrypt ACME pour les tests. Les certificats de staging ne sont pas reconnus par les navigateurs, mais ils évitent d’atteindre les limites strictes de Let’s Encrypt lors du développement.
|
||||||
|
|
||||||
|
Appliquez-le :
|
||||||
|
```bash
|
||||||
|
kubectl apply -f clusterissuer.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Vérifiez si votre `ClusterIssuer` est `Ready` :
|
||||||
|
```bash
|
||||||
|
kubectl get clusterissuers.cert-manager.io
|
||||||
|
NAME READY AGE
|
||||||
|
letsencrypt-staging True 14m
|
||||||
|
```
|
||||||
|
|
||||||
|
S’il ne devient pas `Ready`, utilisez `kubectl describe` sur la ressource pour le diagnostiquer.
|
||||||
|
|
||||||
|
### Ajouter TLS dans un Ingress
|
||||||
|
|
||||||
|
Nous pouvons maintenant sécuriser notre service avec TLS en ajoutant une section `tls` dans la spécification `Ingress` et en référençant le `ClusterIssuer` :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress-https
|
||||||
|
annotations:
|
||||||
|
nginx.ingress.kubernetes.io/rewrite-target: /
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt-staging
|
||||||
|
spec:
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- test.vezpi.me
|
||||||
|
secretName: test-vezpi-me-tls
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
En arrière-plan, Cert-Manager suit ce flux pour émettre le certificat :
|
||||||
|
- Détecte l’`Ingress` avec `tls` et le `ClusterIssuer`.
|
||||||
|
- Crée un CRD **Certificate** décrivant le certificat souhaité + l’emplacement du Secret.
|
||||||
|
- Crée un CRD **Order** pour représenter une tentative d’émission avec Let’s Encrypt.
|
||||||
|
- Crée un CRD **Challenge** (par ex. validation HTTP-01).
|
||||||
|
- Met en place un Ingress/Pod temporaire pour résoudre le challenge.
|
||||||
|
- Crée un CRD **CertificateRequest** et envoie le CSR à Let’s Encrypt.
|
||||||
|
- Reçoit le certificat signé et le stocke dans un Secret Kubernetes.
|
||||||
|
- L’Ingress utilise automatiquement ce Secret pour servir en HTTPS.
|
||||||
|
|
||||||
|
✅ Une fois ce processus terminé, votre Ingress est sécurisé avec un certificat TLS.
|
||||||
|

|
||||||
|
|
||||||
|
### Passer aux certificats de production
|
||||||
|
|
||||||
|
Une fois que le staging fonctionne, nous pouvons passer au serveur **production** ACME pour obtenir un certificat Let’s Encrypt reconnu :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
server: https://acme-v02.api.letsencrypt.org/directory
|
||||||
|
email: <email>
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-key
|
||||||
|
solvers:
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
ingressClassName: nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
Mettez à jour l’`Ingress` pour pointer vers le nouveau `ClusterIssuer` :
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress-https
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
|
spec:
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- test.vezpi.me
|
||||||
|
secretName: test-vezpi-me-tls
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Comme le certificat de staging est encore stocké dans le Secret, je le supprime pour forcer une nouvelle demande en production :
|
||||||
|
```bash
|
||||||
|
kubectl delete secret test-vezpi-me-tls
|
||||||
|
```
|
||||||
|
|
||||||
|
🎉 Mon `Ingress` est désormais sécurisé avec un certificat TLS valide délivré par Let’s Encrypt. Les requêtes vers `https://test.vezpi.me` sont chiffrées de bout en bout et routées par le NGINX Ingress Controller jusqu’à mon pod `nginx` :
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
Dans ce parcours, je suis parti des bases, en exposant un simple pod avec un service `LoadBalancer`, puis j’ai construit étape par étape une configuration prête pour la production :
|
||||||
|
- Compréhension des **Services Kubernetes** et de leurs différents types.
|
||||||
|
- Utilisation du **BGP avec Cilium** et OPNsense pour attribuer des IP externes directement depuis mon réseau.
|
||||||
|
- Introduction des **Ingress** pour mieux passer à l’échelle, en exposant plusieurs services via un point d’entrée unique.
|
||||||
|
- Installation du **NGINX Ingress Controller** pour gérer le routage.
|
||||||
|
- Automatisation de la gestion des certificats avec **Cert-Manager**, afin de sécuriser mes services avec des certificats TLS Let’s Encrypt.
|
||||||
|
|
||||||
|
🚀 Résultat : mon pod est maintenant accessible via une véritable URL, sécurisé en HTTPS, comme n’importe quelle application web moderne.
|
||||||
|
|
||||||
|
C’est une étape importante dans mon aventure Kubernetes en homelab. Dans le prochain article, je souhaite explorer le stockage persistant et connecter mon cluster Kubernetes à mon setup **Ceph** sous **Proxmox**.
|
||||||
|
|
||||||
|
A la prochaine !
|
630
content/post/9-expose-kubernetes-pods-externally-ingress-tls.md
Normal file
@@ -0,0 +1,630 @@
|
|||||||
|
---
|
||||||
|
slug: expose-kubernetes-pods-externally-ingress-tls
|
||||||
|
title: Exposing Kubernetes Pods externally with Ingress and TLS
|
||||||
|
description: Learn how to expose Kubernetes pods externally with Services, Ingress, and TLS using BGP, NGINX, and Cert-Manager in a homelab setup.
|
||||||
|
date: 2025-08-19
|
||||||
|
draft: false
|
||||||
|
tags:
|
||||||
|
- kubernetes
|
||||||
|
- helm
|
||||||
|
- bgp
|
||||||
|
- opnsense
|
||||||
|
- cilium
|
||||||
|
- nginx-ingress-controller
|
||||||
|
- cert-manager
|
||||||
|
categories:
|
||||||
|
- homelab
|
||||||
|
---
|
||||||
|
|
||||||
|
## Intro
|
||||||
|
|
||||||
|
After building my own Kubernetes cluster in my homelab using `kubeadm` in [that post]({{< ref "post/8-create-manual-kubernetes-cluster-kubeadm" >}}), my next challenge is to expose a simple pod externally, reachable with an URL and secured with a TLS certificate verified by Let's Encrypt.
|
||||||
|
|
||||||
|
To achieve this, I needed to configure several components:
|
||||||
|
- **Service**: Expose the pod inside the cluster and provide an access point.
|
||||||
|
- **Ingress**: Define routing rules to expose HTTP(S) services externally.
|
||||||
|
- **Ingress Controller**: Listen to Ingress resources and handles actual traffic routing.
|
||||||
|
- **TLS Certificates**: Secure traffic with HTTPS using certificates from Let’s Encrypt.
|
||||||
|
|
||||||
|
This post guides you through each step to understand how external access works in Kubernetes in a homelab environment.
|
||||||
|
|
||||||
|
Let’s dive in.
|
||||||
|
|
||||||
|
---
|
||||||
|
## Helm
|
||||||
|
|
||||||
|
I use **Helm**, the de facto package manager for Kubernetes, to install external components like the Ingress controller or cert-manager.
|
||||||
|
|
||||||
|
### Why Helm
|
||||||
|
|
||||||
|
Helm simplifies the deployment and management of Kubernetes applications. Instead of writing and maintaining large YAML manifests, Helm lets you install applications with a single command, using versioned and configurable charts.
|
||||||
|
|
||||||
|
### Install Helm
|
||||||
|
|
||||||
|
I install Helm on my LXC bastion host, which already has access to the Kubernetes cluster:
|
||||||
|
```bash
|
||||||
|
curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null
|
||||||
|
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install helm
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
## Kubernetes Services
|
||||||
|
|
||||||
|
Before we can expose a pod externally, we need a way to make it reachable inside the cluster. That’s where Kubernetes Services come in.
|
||||||
|
|
||||||
|
Services act as the bridge between pods and the network, making sure applications remain reachable even as pods are rescheduled.
|
||||||
|
|
||||||
|
There are several types of Kubernetes Services, each serving a different purpose:
|
||||||
|
- **ClusterIP** exposes the Service on a cluster-internal IP, only accessible inside the cluster.
|
||||||
|
- **NodePort** exposes the Service on a static port on each node’s IP, accessible from outside the cluster.
|
||||||
|
- **LoadBalancer** exposes the Service on an external IP, typically using cloud integrations (or BGP in a homelab).
|
||||||
|
|
||||||
|
---
|
||||||
|
## Expose a `LoadBalancer` Service with BGP
|
||||||
|
|
||||||
|
Initially, I considered using **MetalLB** to expose service IPs to my home network. That’s what I used in the past when relying on my ISP box as the main router. But after reading this post, [Use Cilium BGP integration with OPNsense](https://devopstales.github.io/kubernetes/cilium-opnsense-bgp/), I realize I can achieve the same (or even better) using BGP with my **OPNsense** router and **Cilium**, my CNI.
|
||||||
|
### What Is BGP?
|
||||||
|
|
||||||
|
BGP (Border Gateway Protocol) is a routing protocol used to exchange network routes between systems. In the Kubernetes homelab context, BGP allows your Kubernetes nodes to advertise IPs directly to your network router or firewall. Your router then knows how to reach the IPs managed by your cluster.
|
||||||
|
|
||||||
|
So instead of MetalLB managing IP allocation and ARP replies, your nodes directly tell your router: « Hey, I own 192.168.1.240 ».
|
||||||
|
### Legacy MetalLB Approach
|
||||||
|
|
||||||
|
Without BGP, MetalLB in Layer 2 mode works like this:
|
||||||
|
- Assigns a `LoadBalancer` IP (e.g., `192.168.1.240`) from a pool.
|
||||||
|
- One node responds to ARP for that IP on your LAN.
|
||||||
|
|
||||||
|
Yes, MetalLB can also work with BGP, but what if my CNI (Cilium) can handle it out of the box?
|
||||||
|
### BGP with Cilium
|
||||||
|
|
||||||
|
With Cilium + BGP, you get:
|
||||||
|
- Cilium’s agent on the node advertises LoadBalancer IPs over BGP.
|
||||||
|
- Your router learns that IP and routes to the correct node.
|
||||||
|
- No need for MetalLB.
|
||||||
|
|
||||||
|
### BGP Setup
|
||||||
|
|
||||||
|
BGP is disabled by default on both OPNsense and Cilium. Let’s enable it on both ends.
|
||||||
|
|
||||||
|
#### On OPNsense
|
||||||
|
|
||||||
|
According to the [official OPNsense documentation](https://docs.opnsense.org/manual/dynamic_routing.html#bgp-section), enabling BGP requires installing a plugin.
|
||||||
|
|
||||||
|
Head to `System` > `Firmware` > `Plugins` and install the `os-frr` plugin:
|
||||||
|

|
||||||
|
Install `os-frr` plugin in OPNsense
|
||||||
|
|
||||||
|
Once installed, enable the plugin under `Routing` > `General`:
|
||||||
|

|
||||||
|
Enable routing in OPNsense
|
||||||
|
|
||||||
|
Then navigate to the `BGP` section. In the **General** tab:
|
||||||
|
- Tick the box to enable BGP.
|
||||||
|
- Set your **BGP ASN**. I used `64512`, the first private ASN from the reserved range (see [ASN table](https://en.wikipedia.org/wiki/Autonomous_system_\(Internet\)#ASN_Table)):
|
||||||
|

|
||||||
|
General BGP configuration in OPNsense
|
||||||
|
|
||||||
|
Now create your BGP neighbors. I’m only peering with my **worker nodes** (since only they run workloads). For each neighbor:
|
||||||
|
- Set the node’s IP in `Peer-IP`
|
||||||
|
- Use `64513` as the **Remote AS** (Cilium’s ASN)
|
||||||
|
- Set `Update-Source Interface` to `Lab`
|
||||||
|
- Tick `Next-Hop-Self`:
|
||||||
|

|
||||||
|
BGP neighbor configuration in OPNsense
|
||||||
|
|
||||||
|
Here’s how my neighbors list looks once complete:
|
||||||
|

|
||||||
|
BGP neighbor list
|
||||||
|
|
||||||
|
Don’t forget to create a firewall rule allowing BGP (port `179/TCP`) from the **Lab** VLAN to the firewall:
|
||||||
|

|
||||||
|
Allow TCP/179 from Lab to OPNsense
|
||||||
|
|
||||||
|
#### In Cilium
|
||||||
|
|
||||||
|
I already have Cilium installed and couldn’t find a way to enable BGP with the CLI, so I simply reinstall it with the BGP option:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cilium uninstall
|
||||||
|
cilium install --set bgpControlPlane.enabled=true
|
||||||
|
```
|
||||||
|
|
||||||
|
I configure only worker nodes to establish BGP peering by labeling them for the `nodeSelector`:
|
||||||
|
```bash
|
||||||
|
kubectl label node apex-worker node-role.kubernetes.io/worker=""
|
||||||
|
kubectl label node vertex-worker node-role.kubernetes.io/worker=""
|
||||||
|
kubectl label node zenith-worker node-role.kubernetes.io/worker=""
|
||||||
|
```
|
||||||
|
```plaintext
|
||||||
|
NAME STATUS ROLES AGE VERSION
|
||||||
|
apex-master Ready control-plane 5d4h v1.32.7
|
||||||
|
apex-worker Ready worker 5d1h v1.32.7
|
||||||
|
vertex-master Ready control-plane 5d1h v1.32.7
|
||||||
|
vertex-worker Ready worker 5d1h v1.32.7
|
||||||
|
zenith-master Ready control-plane 5d1h v1.32.7
|
||||||
|
zenith-worker Ready worker 5d1h v1.32.7
|
||||||
|
```
|
||||||
|
|
||||||
|
For the entire BGP configuration, I need:
|
||||||
|
- **CiliumBGPClusterConfig**: BGP settings for the Cilium cluster, including its local ASN and its peer
|
||||||
|
- **CiliumBGPPeerConfig**: Sets BGP timers, graceful restart, and route advertisement settings.
|
||||||
|
- **CiliumBGPAdvertisement**: Defines which Kubernetes services should be advertised via BGP.
|
||||||
|
- **CiliumLoadBalancerIPPool**: Configures the range of IPs assigned to Kubernetes LoadBalancer services.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumBGPClusterConfig
|
||||||
|
metadata:
|
||||||
|
name: bgp-cluster
|
||||||
|
spec:
|
||||||
|
nodeSelector:
|
||||||
|
matchLabels:
|
||||||
|
node-role.kubernetes.io/worker: "" # Only for worker nodes
|
||||||
|
bgpInstances:
|
||||||
|
- name: "cilium-bgp-cluster"
|
||||||
|
localASN: 64513 # Cilium ASN
|
||||||
|
peers:
|
||||||
|
- name: "pfSense-peer"
|
||||||
|
peerASN: 64512 # OPNsense ASN
|
||||||
|
peerAddress: 192.168.66.1 # OPNsense IP
|
||||||
|
peerConfigRef:
|
||||||
|
name: "bgp-peer"
|
||||||
|
---
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumBGPPeerConfig
|
||||||
|
metadata:
|
||||||
|
name: bgp-peer
|
||||||
|
spec:
|
||||||
|
timers:
|
||||||
|
holdTimeSeconds: 9
|
||||||
|
keepAliveTimeSeconds: 3
|
||||||
|
gracefulRestart:
|
||||||
|
enabled: true
|
||||||
|
restartTimeSeconds: 15
|
||||||
|
families:
|
||||||
|
- afi: ipv4
|
||||||
|
safi: unicast
|
||||||
|
advertisements:
|
||||||
|
matchLabels:
|
||||||
|
advertise: "bgp"
|
||||||
|
---
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumBGPAdvertisement
|
||||||
|
metadata:
|
||||||
|
name: bgp-advertisement
|
||||||
|
labels:
|
||||||
|
advertise: bgp
|
||||||
|
spec:
|
||||||
|
advertisements:
|
||||||
|
- advertisementType: "Service"
|
||||||
|
service:
|
||||||
|
addresses:
|
||||||
|
- LoadBalancerIP
|
||||||
|
selector:
|
||||||
|
matchExpressions:
|
||||||
|
- { key: somekey, operator: NotIn, values: [ never-used-value ] }
|
||||||
|
---
|
||||||
|
apiVersion: "cilium.io/v2alpha1"
|
||||||
|
kind: CiliumLoadBalancerIPPool
|
||||||
|
metadata:
|
||||||
|
name: "dmz"
|
||||||
|
spec:
|
||||||
|
blocks:
|
||||||
|
- start: "192.168.55.20" # LB Range Start IP
|
||||||
|
stop: "192.168.55.250" # LB Range End IP
|
||||||
|
```
|
||||||
|
|
||||||
|
Apply it:
|
||||||
|
```bash
|
||||||
|
kubectl apply -f bgp.yaml
|
||||||
|
|
||||||
|
ciliumbgpclusterconfig.cilium.io/bgp-cluster created
|
||||||
|
ciliumbgppeerconfig.cilium.io/bgp-peer created
|
||||||
|
ciliumbgpadvertisement.cilium.io/bgp-advertisement created
|
||||||
|
ciliumloadbalancerippool.cilium.io/dmz created
|
||||||
|
```
|
||||||
|
|
||||||
|
If everything works, you should see the BGP sessions **established** with your workers:
|
||||||
|
```bash
|
||||||
|
cilium bgp peers
|
||||||
|
|
||||||
|
Node Local AS Peer AS Peer Address Session State Uptime Family Received Advertised
|
||||||
|
apex-worker 64513 64512 192.168.66.1 established 6m30s ipv4/unicast 1 2
|
||||||
|
vertex-worker 64513 64512 192.168.66.1 established 7m9s ipv4/unicast 1 2
|
||||||
|
zenith-worker 64513 64512 192.168.66.1 established 6m13s ipv4/unicast 1 2
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deploying a `LoadBalancer` Service with BGP
|
||||||
|
|
||||||
|
Let’s quickly validate that the setup works by deploying a test `Deployment` and `LoadBalancer` `Service`:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: test-lb
|
||||||
|
spec:
|
||||||
|
type: LoadBalancer
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 80
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
svc: test-lb
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: nginx
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
svc: test-lb
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
svc: test-lb
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: web
|
||||||
|
image: nginx
|
||||||
|
imagePullPolicy: IfNotPresent
|
||||||
|
ports:
|
||||||
|
- containerPort: 80
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /
|
||||||
|
port: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Check if it gets an external IP:
|
||||||
|
```bash
|
||||||
|
kubectl get services test-lb
|
||||||
|
|
||||||
|
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||||
|
test-lb LoadBalancer 10.100.167.198 192.168.55.20 80:31350/TCP 169m
|
||||||
|
```
|
||||||
|
|
||||||
|
The service got the first IP from our defined pool: `192.168.55.20`.
|
||||||
|
|
||||||
|
Now from any device on the LAN, try to reach that IP on port 80:
|
||||||
|

|
||||||
|
|
||||||
|
✅ Our pod is reachable through BGP-routed `LoadBalancer` IP, first step successful!
|
||||||
|
|
||||||
|
---
|
||||||
|
## Kubernetes Ingress
|
||||||
|
|
||||||
|
We managed to expose a pod externally using a `LoadBalancer` service and a BGP-assigned IP address. This approach works great for testing, but it doesn't scale well.
|
||||||
|
|
||||||
|
Imagine having 10, 20, or 50 different services, would I really want to allocate 50 IP addresses, and clutter my firewall and routing tables with 50 BGP entries? Definitely not.
|
||||||
|
|
||||||
|
That’s where **Ingress** kicks in.
|
||||||
|
|
||||||
|
### What Is a Kubernetes Ingress?
|
||||||
|
|
||||||
|
A Kubernetes **Ingress** is an API object that manages **external access to services** in a cluster, typically HTTP and HTTPS, all through a single entry point.
|
||||||
|
|
||||||
|
Instead of assigning one IP per service, you define routing rules based on:
|
||||||
|
- **Hostnames** (`app1.vezpi.me`, `blog.vezpi.me`, etc.)
|
||||||
|
- **Paths** (`/grafana`, `/metrics`, etc.)
|
||||||
|
|
||||||
|
With Ingress, I can expose multiple services over the same IP and port (usually 443 for HTTPS), and Kubernetes will know how to route the request to the right backend service.
|
||||||
|
|
||||||
|
Here is an example of a simple `Ingress`, routing traffic of `test.vezpi.me` to the `test-lb` service on port 80:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ingress Controller
|
||||||
|
|
||||||
|
On its own, an Ingress is just a set of routing rules. It doesn’t actually handle traffic. To bring it to life, I need an **Ingress Controller**, which will:
|
||||||
|
- Watches the Kubernetes API for `Ingress` resources.
|
||||||
|
- Opens HTTP(S) ports on a `LoadBalancer` or `NodePort` service.
|
||||||
|
- Routes traffic to the correct `Service` based on the `Ingress` rules.
|
||||||
|
|
||||||
|
Popular controllers include NGINX, Traefik, HAProxy, and more. Since I was looking for something simple, stable, and widely adopted, I picked the **NGINX Ingress Controller**.
|
||||||
|
|
||||||
|
### Install NGINX Ingress Controller
|
||||||
|
|
||||||
|
I use Helm to install the controller, and I set `controller.ingressClassResource.default=true` so that all my future ingresses use it by default:
|
||||||
|
```bash
|
||||||
|
helm install ingress-nginx \
|
||||||
|
--repo=https://kubernetes.github.io/ingress-nginx \
|
||||||
|
--namespace=ingress-nginx \
|
||||||
|
--create-namespace ingress-nginx \
|
||||||
|
--set controller.ingressClassResource.default=true \
|
||||||
|
--set controller.config.strict-validate-path-type=false
|
||||||
|
```
|
||||||
|
|
||||||
|
The controller is deployed and exposes a `LoadBalancer` service. In my setup, it picks the second available IP in the BGP range:
|
||||||
|
```bash
|
||||||
|
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR
|
||||||
|
ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.21 80:31195/TCP,443:30974/TCP 75s app.kubernetes.io/component=controller,app.kubernetes.io/instance=ingress-nginx,app.kubernetes.io/name=ingress-nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
### Reserving a Static IP for the Controller
|
||||||
|
|
||||||
|
I want to make sure the Ingress Controller always receives the same IP address. To do this, I created two separate Cilium IP pools:
|
||||||
|
- One dedicated for the Ingress Controller with a single IP.
|
||||||
|
- One for everything else.
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
# Pool for Ingress Controller
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumLoadBalancerIPPool
|
||||||
|
metadata:
|
||||||
|
name: ingress-nginx
|
||||||
|
spec:
|
||||||
|
blocks:
|
||||||
|
- cidr: 192.168.55.55/32
|
||||||
|
serviceSelector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: ingress-nginx
|
||||||
|
app.kubernetes.io/component: controller
|
||||||
|
---
|
||||||
|
# Default pool for other services
|
||||||
|
apiVersion: cilium.io/v2alpha1
|
||||||
|
kind: CiliumLoadBalancerIPPool
|
||||||
|
metadata:
|
||||||
|
name: default
|
||||||
|
spec:
|
||||||
|
blocks:
|
||||||
|
- start: 192.168.55.100
|
||||||
|
stop: 192.168.55.250
|
||||||
|
serviceSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: app.kubernetes.io/name
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- ingress-nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
After replacing the previous shared pool with these two, the Ingress Controller gets the desired IP `192.168.55.55`, and the `test-lb` service picks `192.168.55.100` as expected:
|
||||||
|
```bash
|
||||||
|
NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
|
||||||
|
default test-lb LoadBalancer 10.100.167.198 192.168.55.100 80:31350/TCP 6h34m
|
||||||
|
ingress-nginx ingress-nginx-controller LoadBalancer 10.106.236.13 192.168.55.55 80:31195/TCP,443:30974/TCP 24m
|
||||||
|
```
|
||||||
|
|
||||||
|
### Associate a Service to an Ingress
|
||||||
|
|
||||||
|
Now let’s wire up a service to this controller.
|
||||||
|
|
||||||
|
First, I update the original `LoadBalancer` service and convert it into a `ClusterIP` (since the Ingress Controller will now expose it externally):
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: test-lb
|
||||||
|
spec:
|
||||||
|
ports:
|
||||||
|
- port: 80
|
||||||
|
targetPort: 80
|
||||||
|
protocol: TCP
|
||||||
|
name: http
|
||||||
|
selector:
|
||||||
|
svc: test-lb
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress
|
||||||
|
spec:
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Then I apply the `Ingress` manifest as shown earlier to expose the service over HTTP.
|
||||||
|
|
||||||
|
Since I'm using the Caddy plugin on OPNsense, I still need a local Layer 4 route to forward traffic for `test.vezpi.me` to the NGINX Ingress Controller IP (`192.168.55.55`). I simply create a new rule in the Caddy plugin.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
Now let’s test it in the browser:
|
||||||
|

|
||||||
|
Test Ingress on HTTP
|
||||||
|
|
||||||
|
✅ Our pod is now reachable on its HTTP URL using an Ingress. Second step complete!
|
||||||
|
|
||||||
|
---
|
||||||
|
## Secure Connection with TLS
|
||||||
|
|
||||||
|
Exposing services over plain HTTP is fine for testing, but in practice we almost always want **HTTPS**. TLS certificates encrypt traffic and provide authenticity and trust to users.
|
||||||
|
|
||||||
|
### Cert-Manager
|
||||||
|
|
||||||
|
To automate certificate management in Kubernetes, we use **Cert-Manager**. It can request, renew, and manage TLS certificates without manual intervention.
|
||||||
|
|
||||||
|
#### Install Cert-Manager
|
||||||
|
|
||||||
|
We deploy it with Helm on the cluster:
|
||||||
|
```bash
|
||||||
|
helm repo add jetstack https://charts.jetstack.io
|
||||||
|
helm repo update
|
||||||
|
helm install cert-manager jetstack/cert-manager \
|
||||||
|
--namespace cert-manager \
|
||||||
|
--create-namespace \
|
||||||
|
--set crds.enabled=true
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Setup Cert-Manager
|
||||||
|
|
||||||
|
Next, we configure a **ClusterIssuer** for Let’s Encrypt. This resource tells Cert-Manager how to request certificates:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt-staging
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
server: https://acme-staging-v02.api.letsencrypt.org/directory
|
||||||
|
email: <email>
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-staging-key
|
||||||
|
solvers:
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
ingressClassName: nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
ℹ️ Here I define the **staging** Let’s Encrypt ACME server for testing purposes. Staging certificates are not trusted by browsers, but they prevent hitting Let’s Encrypt’s strict rate limits during development.
|
||||||
|
|
||||||
|
Apply it:
|
||||||
|
```bash
|
||||||
|
kubectl apply -f clusterissuer.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify if your `ClusterIssuer` is `Ready`:
|
||||||
|
```bash
|
||||||
|
kubectl get clusterissuers.cert-manager.io
|
||||||
|
NAME READY AGE
|
||||||
|
letsencrypt-staging True 14m
|
||||||
|
```
|
||||||
|
|
||||||
|
If it doesn’t become `Ready`, use `kubectl describe` on the resource to troubleshoot.
|
||||||
|
|
||||||
|
### Add TLS in an Ingress
|
||||||
|
|
||||||
|
Now we can secure our service with TLS by adding a `tls` section in the `Ingress` spec and referencing the `ClusterIssuer`:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress-https
|
||||||
|
annotations:
|
||||||
|
nginx.ingress.kubernetes.io/rewrite-target: /
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt-staging
|
||||||
|
spec:
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- test.vezpi.me
|
||||||
|
secretName: test-vezpi-me-tls
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Behind the scenes, Cert-Manager goes through this workflow to issue the certificate:
|
||||||
|
- Detects the `Ingress` with `tls` and the `ClusterIssuer`.
|
||||||
|
- Creates a Certificate CRD that describes the desired cert + Secret storage.
|
||||||
|
- Creates an Order CRD to represent one issuance attempt with Let’s Encrypt.
|
||||||
|
- Creates a Challenge CRD (e.g., HTTP-01 validation).
|
||||||
|
- Provisions a temporary solver Ingress/Pod to solve the challenge.
|
||||||
|
- Creates a CertificateRequest CRD and sends the CSR to Let’s Encrypt.
|
||||||
|
- Receives the signed certificate and stores it in a Kubernetes Secret.
|
||||||
|
- The Ingress automatically uses the Secret to serve HTTPS.
|
||||||
|
|
||||||
|
✅ Once this process completes, your Ingress is secured with a TLS certificate.
|
||||||
|

|
||||||
|
|
||||||
|
### Switch to Production Certificates
|
||||||
|
|
||||||
|
Once staging works, we can safely switch to the **production** ACME server to get a trusted certificate from Let’s Encrypt:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: cert-manager.io/v1
|
||||||
|
kind: ClusterIssuer
|
||||||
|
metadata:
|
||||||
|
name: letsencrypt
|
||||||
|
spec:
|
||||||
|
acme:
|
||||||
|
server: https://acme-v02.api.letsencrypt.org/directory
|
||||||
|
email: <email>
|
||||||
|
privateKeySecretRef:
|
||||||
|
name: letsencrypt-key
|
||||||
|
solvers:
|
||||||
|
- http01:
|
||||||
|
ingress:
|
||||||
|
ingressClassName: nginx
|
||||||
|
```
|
||||||
|
|
||||||
|
Update the `Ingress` to reference the new `ClusterIssuer`:
|
||||||
|
```yaml
|
||||||
|
---
|
||||||
|
apiVersion: networking.k8s.io/v1
|
||||||
|
kind: Ingress
|
||||||
|
metadata:
|
||||||
|
name: test-ingress-https
|
||||||
|
annotations:
|
||||||
|
cert-manager.io/cluster-issuer: letsencrypt
|
||||||
|
spec:
|
||||||
|
tls:
|
||||||
|
- hosts:
|
||||||
|
- test.vezpi.me
|
||||||
|
secretName: test-vezpi-me-tls
|
||||||
|
rules:
|
||||||
|
- host: test.vezpi.me
|
||||||
|
http:
|
||||||
|
paths:
|
||||||
|
- path: /
|
||||||
|
pathType: Prefix
|
||||||
|
backend:
|
||||||
|
service:
|
||||||
|
name: test-lb
|
||||||
|
port:
|
||||||
|
number: 80
|
||||||
|
```
|
||||||
|
|
||||||
|
Since the staging certificate is still stored in the Secret, I delete it to trigger a fresh request against production:
|
||||||
|
```bash
|
||||||
|
kubectl delete secret test-vezpi-me-tls
|
||||||
|
```
|
||||||
|
|
||||||
|
🎉 My `Ingress` is now secured with a valid TLS certificate from Let’s Encrypt. Requests to `https://test.vezpi.me` are encrypted end-to-end and routed by the NGINX Ingress Controller to my `nginx` pod:
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
In this journey, I started from the basics, exposing a single pod with a `LoadBalancer` service, and step by step built a production-ready setup:
|
||||||
|
- Learned about **Kubernetes Services** and their different types.
|
||||||
|
- Used **BGP with Cilium** and OPNsense to assign external IPs directly from my network.
|
||||||
|
- Introduced **Ingress** to scale better, exposing multiple services through a single entry point.
|
||||||
|
- Installed the **NGINX Ingress Controller** to handle routing.
|
||||||
|
- Automated certificate management with **Cert-Manager**, securing my services with Let’s Encrypt TLS certificates.
|
||||||
|
|
||||||
|
🚀 The result: my pod is now reachable at a real URL, secured with HTTPS, just like any modern web application.
|
||||||
|
|
||||||
|
This is a huge milestone in my homelab Kubernetes journey. In the next article, I want to explore persistent storage and connect my Kubernetes cluster to my **Ceph** setup on **Proxmox**.
|
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: Random post
|
title: Playground
|
||||||
description:
|
description:
|
||||||
date: 2025-06-25
|
date: 2025-06-25
|
||||||
draft: true
|
draft: true
|
||||||
@@ -9,4 +9,10 @@ categories:
|
|||||||
|
|
||||||
Hi there, how are you ?
|
Hi there, how are you ?
|
||||||
|
|
||||||
I'm testing
|
I'm ==testing==
|
||||||
|
|
||||||
|
## Emoji
|
||||||
|
|
||||||
|
🚀💡🔧🔁⚙️📝📌✅⚠️🍒❌ℹ️⌛🚨🎉
|
||||||
|
|
||||||
|
[post]({{< ref "post/0-template" >}})
|
@@ -1,43 +0,0 @@
|
|||||||
---
|
|
||||||
slug:
|
|
||||||
title: Template
|
|
||||||
description:
|
|
||||||
date:
|
|
||||||
draft: true
|
|
||||||
tags:
|
|
||||||
categories:
|
|
||||||
---
|
|
||||||
## Develop a Terraform Module
|
|
||||||
|
|
||||||
In the final step of this article, I will show you how you can transform this piece of code in a reusable Terraform module.
|
|
||||||
|
|
||||||
### What is a Terraform Module?
|
|
||||||
|
|
||||||
Terraform modules are reusable components that let you organize and simplify your infrastructure code by grouping related resources into a single unit. Instead of repeating the same configuration across multiple places, you can define it once in a module and use it wherever needed, just like calling a function in programming.
|
|
||||||
|
|
||||||
Modules can be local (within your project) or remote (from the Terraform Registry or a Git repository), making it easy to share and standardize infrastructure patterns across teams or projects. By using modules, you make your code more readable, maintainable, and scalable.
|
|
||||||
|
|
||||||
### Terraform Code
|
|
||||||
|
|
||||||
We will now transform the Terraform code above by creating our own module called `pve_vm
|
|
||||||
|
|
||||||
> 📌 Reminder, you can find all the code I have written in my [Homelab repo](https://git.vezpi.me/Vezpi/Homelab/), the following code is located [here](https://git.vezpi.me/Vezpi/Homelab/src/commit/22f64034175a6a4642a2c7b6656688f16ece5ba1/terraform/projects/simple-vm). Don't forget to match your variables with your environment!
|
|
||||||
#### Code Structure
|
|
||||||
|
|
||||||
```plaintext
|
|
||||||
terraform
|
|
||||||
|-- modules
|
|
||||||
| `-- pve_vm
|
|
||||||
| |-- main.tf
|
|
||||||
| |-- provider.tf
|
|
||||||
| `-- variables.tf
|
|
||||||
`-- projects
|
|
||||||
`-- simple-vm-with-module
|
|
||||||
|-- credentials.auto.tfvars
|
|
||||||
|-- main.tf
|
|
||||||
|-- provider.tf
|
|
||||||
`-- variables.tf
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Module
|
|
||||||
|
|
@@ -2,7 +2,7 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
REPO_URL="${REPO_URL:-https://git.vezpi.me/Vezpi/blog.git}"
|
REPO_URL="${REPO_URL:-https://git.vezpi.com/Vezpi/blog.git}"
|
||||||
URL="${URL:-blog.vezpi.com}"
|
URL="${URL:-blog.vezpi.com}"
|
||||||
BRANCH="${BRANCH:-preview}"
|
BRANCH="${BRANCH:-preview}"
|
||||||
CLONE_DIR="${CLONE_DIR:-/blog}"
|
CLONE_DIR="${CLONE_DIR:-/blog}"
|
||||||
@@ -14,6 +14,9 @@ if [ "$BRANCH" = "preview" ]; then
|
|||||||
DRAFTS="--buildDrafts --buildFuture"
|
DRAFTS="--buildDrafts --buildFuture"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Clean blog dir
|
||||||
|
rm -rf "$CLONE_DIR"
|
||||||
|
|
||||||
# Clone repo
|
# Clone repo
|
||||||
echo "- Cloning $REPO_URL (branch: $BRANCH)..."
|
echo "- Cloning $REPO_URL (branch: $BRANCH)..."
|
||||||
git clone --recurse-submodules --branch "$BRANCH" "$REPO_URL" "$CLONE_DIR"
|
git clone --recurse-submodules --branch "$BRANCH" "$REPO_URL" "$CLONE_DIR"
|
||||||
|
14
hugo.yaml
@@ -38,12 +38,6 @@ languages:
|
|||||||
weight: 20
|
weight: 20
|
||||||
params:
|
params:
|
||||||
icon: "brand-github"
|
icon: "brand-github"
|
||||||
- identifier: "gitea"
|
|
||||||
name: "Gitea"
|
|
||||||
url: "https://git.vezpi.me/Vezpi/blog"
|
|
||||||
weight: 30
|
|
||||||
params:
|
|
||||||
icon: "brand-git"
|
|
||||||
- identifier: "linkedin"
|
- identifier: "linkedin"
|
||||||
name: "LinkedIn"
|
name: "LinkedIn"
|
||||||
url: "https://www.linkedin.com/in/etiennegirault/"
|
url: "https://www.linkedin.com/in/etiennegirault/"
|
||||||
@@ -90,16 +84,10 @@ languages:
|
|||||||
weight: 20
|
weight: 20
|
||||||
params:
|
params:
|
||||||
icon: "brand-github"
|
icon: "brand-github"
|
||||||
- identifier: "gitea"
|
|
||||||
name: "Gitea"
|
|
||||||
url: "https://git.vezpi.me/Vezpi/blog"
|
|
||||||
weight: 30
|
|
||||||
params:
|
|
||||||
icon: "brand-git"
|
|
||||||
- identifier: "linkedin"
|
- identifier: "linkedin"
|
||||||
name: "LinkedIn"
|
name: "LinkedIn"
|
||||||
url: "https://www.linkedin.com/in/etiennegirault/"
|
url: "https://www.linkedin.com/in/etiennegirault/"
|
||||||
weight: 40
|
weight: 30
|
||||||
params:
|
params:
|
||||||
icon: "brand-linkedin"
|
icon: "brand-linkedin"
|
||||||
|
|
||||||
|
30
layouts/index.html
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{{ define "main" }}
|
||||||
|
<header class="homepage-header">
|
||||||
|
{{ with .Title }}
|
||||||
|
<h1 class="article-title">{{ . }}</h1>
|
||||||
|
{{ end }}
|
||||||
|
{{ with .Content }}
|
||||||
|
<div class="article-subtitle">
|
||||||
|
{{ . }}
|
||||||
|
</div>
|
||||||
|
{{ end }}
|
||||||
|
</header>
|
||||||
|
|
||||||
|
{{ $pages := where .Site.RegularPages "Type" "in" .Site.Params.mainSections }}
|
||||||
|
{{ $notHidden := where .Site.RegularPages "Params.hidden" "!=" true }}
|
||||||
|
{{ $filtered := ($pages | intersect $notHidden) }}
|
||||||
|
{{ $pag := .Paginate ($filtered) }}
|
||||||
|
|
||||||
|
<section class="article-list">
|
||||||
|
{{ range $index, $element := $pag.Pages }}
|
||||||
|
{{ partial "article-list/default" . }}
|
||||||
|
{{ end }}
|
||||||
|
</section>
|
||||||
|
|
||||||
|
{{- partial "pagination.html" . -}}
|
||||||
|
{{- partial "footer/footer" . -}}
|
||||||
|
{{ end }}
|
||||||
|
|
||||||
|
{{ define "right-sidebar" }}
|
||||||
|
{{ partial "sidebar/right.html" (dict "Context" . "Scope" "homepage") }}
|
||||||
|
{{ end }}
|
@@ -1,4 +1,7 @@
|
|||||||
<script defer src="https://analytics.vezpi.me/script.js" data-website-id="e50e5843-1039-4bc8-a3f6-80f60e25ea38"></script>
|
<script defer src="https://analytics.vezpi.com/script.js" data-website-id="e50e5843-1039-4bc8-a3f6-80f60e25ea38"></script>
|
||||||
|
{{ if or (eq .Kind "taxonomy") (eq .Kind "term") }}
|
||||||
|
<meta name="robots" content="noindex,follow">
|
||||||
|
{{ end }}
|
||||||
{{- if .Params.keywords }}
|
{{- if .Params.keywords }}
|
||||||
<meta name="keywords" content="{{ delimit .Params.keywords ", " }}">
|
<meta name="keywords" content="{{ delimit .Params.keywords ", " }}">
|
||||||
{{- else if .Params.tags }}
|
{{- else if .Params.tags }}
|
||||||
|
@@ -8,7 +8,7 @@
|
|||||||
<header>
|
<header>
|
||||||
{{ with .Site.Params.sidebar.avatar }}
|
{{ with .Site.Params.sidebar.avatar }}
|
||||||
{{ if (default true .enabled) }}
|
{{ if (default true .enabled) }}
|
||||||
<figure class="site-avatar">
|
<figure class="site-avatar" style="margin-left: auto; margin-right: auto;">
|
||||||
<a href="{{ .Site.BaseURL | relLangURL }}">
|
<a href="{{ .Site.BaseURL | relLangURL }}">
|
||||||
{{ if not .local }}
|
{{ if not .local }}
|
||||||
<img src="{{ .src }}" width="300" height="300" class="site-logo" loading="lazy" alt="Avatar">
|
<img src="{{ .src }}" width="300" height="300" class="site-logo" loading="lazy" alt="Avatar">
|
||||||
@@ -31,12 +31,13 @@
|
|||||||
{{ end }}
|
{{ end }}
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
|
||||||
<div class="site-meta">
|
<div class="site-meta" style="text-align: center;">
|
||||||
<h1 class="site-name"><a href="{{ .Site.BaseURL | relLangURL }}">{{ .Site.Title }}</a></h1>
|
<h1 class="site-name"><a href="{{ .Site.BaseURL | relLangURL }}">{{ .Site.Title }}</a></h1>
|
||||||
<h2 class="site-description">{{ .Site.Params.sidebar.subtitle }}</h2>
|
<h2 class="site-description" style="font-size: 1.3rem;">{{ .Site.Params.sidebar.subtitle }}</h2>
|
||||||
</div>
|
</div>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
|
{{- $page := . -}}
|
||||||
{{- with .Site.Menus.social -}}
|
{{- with .Site.Menus.social -}}
|
||||||
<ol class="menu-social">
|
<ol class="menu-social">
|
||||||
{{ range . }}
|
{{ range . }}
|
||||||
@@ -54,6 +55,16 @@
|
|||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
{{- $currentLang := $page.Language.Lang -}}
|
||||||
|
{{- range $page.AllTranslations }}
|
||||||
|
{{- if ne .Language.Lang $currentLang }}
|
||||||
|
<li class="lang-toggle-icon">
|
||||||
|
<a href="{{ .Permalink }}" title="Switch to {{ .Language.Lang }}">
|
||||||
|
{{ partial "helper/icon" (printf "toggle_to_%s" .Language.Lang) }}
|
||||||
|
</a>
|
||||||
|
</li>
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
</ol>
|
</ol>
|
||||||
{{- end -}}
|
{{- end -}}
|
||||||
|
|
||||||
@@ -76,22 +87,9 @@
|
|||||||
{{ end }}
|
{{ end }}
|
||||||
<li class="menu-bottom-section">
|
<li class="menu-bottom-section">
|
||||||
<ol class="menu">
|
<ol class="menu">
|
||||||
{{- $currentLang := .Language.Lang -}}
|
|
||||||
{{- range .Site.Home.AllTranslations }}
|
|
||||||
{{- if ne .Language.Lang $currentLang }}
|
|
||||||
<li id="i18n-switch">
|
|
||||||
<a href="{{ .Permalink }}" title="{{ .Language.LanguageName }}">
|
|
||||||
{{ partial "helper/icon" "language" }}
|
|
||||||
{{ .Language.LanguageName }}
|
|
||||||
</a>
|
|
||||||
</li>
|
|
||||||
{{- end }}
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{ if (default false .Site.Params.colorScheme.toggle) }}
|
{{ if (default false .Site.Params.colorScheme.toggle) }}
|
||||||
<li id="dark-mode-toggle">
|
<li id="dark-mode-toggle">
|
||||||
{{ partial "helper/icon" "moon" }}
|
{{ partial "helper/icon" "moon" }}
|
||||||
{{ partial "helper/icon" "brightness-up" }}
|
|
||||||
<span>{{ T "darkMode" }}</span>
|
<span>{{ T "darkMode" }}</span>
|
||||||
</li>
|
</li>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
BIN
static/img/gitea-blog-ntfy-credentials.png
Normal file
After Width: | Height: | Size: 44 KiB |
BIN
static/img/gotify-android-first-login.png
Normal file
After Width: | Height: | Size: 554 KiB |
BIN
static/img/gotify-android-test-messages.png
Normal file
After Width: | Height: | Size: 2.0 MiB |
BIN
static/img/gotify-application-list.png
Normal file
After Width: | Height: | Size: 30 KiB |
BIN
static/img/gotify-create-new-application.png
Normal file
After Width: | Height: | Size: 64 KiB |
BIN
static/img/gotify-dashboard-no-messages.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/gotify-login-page.png
Normal file
After Width: | Height: | Size: 34 KiB |
BIN
static/img/gotify-messages-received.png
Normal file
After Width: | Height: | Size: 82 KiB |
BIN
static/img/home-assistant-temperature-room-sliders.png
Normal file
After Width: | Height: | Size: 10 KiB |
BIN
static/img/homelan-current-physical-layout.png
Normal file
After Width: | Height: | Size: 135 KiB |
BIN
static/img/ingress-controller-nginx-test-simple-webserver.png
Normal file
After Width: | Height: | Size: 40 KiB |
After Width: | Height: | Size: 42 KiB |
After Width: | Height: | Size: 10 KiB |
BIN
static/img/k8s-test-loadbalancer-service-with-bgp.png
Normal file
After Width: | Height: | Size: 40 KiB |
BIN
static/img/node-red-call-service-node-notification.png
Normal file
After Width: | Height: | Size: 49 KiB |
BIN
static/img/node-red-call-service-node-set-fan-mode.png
Normal file
After Width: | Height: | Size: 33 KiB |
BIN
static/img/node-red-call-service-node-set-hvac-mode.png
Normal file
After Width: | Height: | Size: 33 KiB |
After Width: | Height: | Size: 33 KiB |
BIN
static/img/node-red-call-service-node-start-timer.png
Normal file
After Width: | Height: | Size: 28 KiB |
BIN
static/img/node-red-call-service-node-start-unit-timer.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/node-red-call-service-node-turn-off.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/node-red-call-service-node-turn-on.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/node-red-change-node-room-partout.png
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
static/img/node-red-current-state-node-climatisation-enabled.png
Normal file
After Width: | Height: | Size: 33 KiB |
BIN
static/img/node-red-current-state-node-get-unit-state.png
Normal file
After Width: | Height: | Size: 44 KiB |
BIN
static/img/node-red-current-state-node-lock-timer.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
static/img/node-red-delay-node-1-msg-per-second.png
Normal file
After Width: | Height: | Size: 31 KiB |
BIN
static/img/node-red-filter-node-blocker.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
static/img/node-red-ha-ac-automation-before.png
Normal file
After Width: | Height: | Size: 374 KiB |
BIN
static/img/node-red-new-ac-workflow-with-legend.png
Normal file
After Width: | Height: | Size: 147 KiB |
BIN
static/img/node-red-switch-node-check-user-id.png
Normal file
After Width: | Height: | Size: 21 KiB |
BIN
static/img/node-red-switch-node-compare-speed.png
Normal file
After Width: | Height: | Size: 25 KiB |
BIN
static/img/node-red-switch-node-fan-speed.png
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
static/img/node-red-switch-node-room-config.png
Normal file
After Width: | Height: | Size: 34 KiB |
BIN
static/img/node-red-switch-node-room-selector-watchdog.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/node-red-switch-node-select-action.png
Normal file
After Width: | Height: | Size: 33 KiB |
BIN
static/img/node-red-switch-node-set-temp.png
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
static/img/node-red-switch-node-user-id.png
Normal file
After Width: | Height: | Size: 28 KiB |
BIN
static/img/node-red-temperature-sensor-join-node.png
Normal file
After Width: | Height: | Size: 46 KiB |
BIN
static/img/node-red-temperature-sensors-trigger-node.png
Normal file
After Width: | Height: | Size: 46 KiB |
BIN
static/img/node-red-trigger-node-window-watchdog.png
Normal file
After Width: | Height: | Size: 56 KiB |
BIN
static/img/node-red-trigger-state-mode-for-sliders.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
static/img/node-red-trigger-state-node-toggles.png
Normal file
After Width: | Height: | Size: 32 KiB |
BIN
static/img/node-red-trigger-state-node-windows.png
Normal file
After Width: | Height: | Size: 38 KiB |
BIN
static/img/node-red-trigger-state-unit-change.png
Normal file
After Width: | Height: | Size: 36 KiB |
BIN
static/img/ntfy-android-app.png
Normal file
After Width: | Height: | Size: 3.4 MiB |
BIN
static/img/ntfy-login-dashboard.png
Normal file
After Width: | Height: | Size: 96 KiB |
BIN
static/img/ntfy-testing-blog-notifications.png
Normal file
After Width: | Height: | Size: 42 KiB |
BIN
static/img/ntfy-testing-gitea-blog-user.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/ntfy-topic-messages.png
Normal file
After Width: | Height: | Size: 104 KiB |
BIN
static/img/opnsense-add-os-frr-plugin.png
Normal file
After Width: | Height: | Size: 71 KiB |
BIN
static/img/opnsense-bgp-create-neighbor.png
Normal file
After Width: | Height: | Size: 33 KiB |
BIN
static/img/opnsense-bgp-neighbor-list.png
Normal file
After Width: | Height: | Size: 25 KiB |
BIN
static/img/opnsense-caddy-create-layer4-route-http.png
Normal file
After Width: | Height: | Size: 29 KiB |
BIN
static/img/opnsense-create-firewall-rule-bgp-peering.png
Normal file
After Width: | Height: | Size: 50 KiB |
BIN
static/img/opnsense-download-backup.png
Normal file
After Width: | Height: | Size: 44 KiB |
BIN
static/img/opnsense-enable-bgp.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
static/img/opnsense-enable-routing-frr-plugin.png
Normal file
After Width: | Height: | Size: 24 KiB |
BIN
static/img/opnsense-vm-assign-pfsync-interface.png
Normal file
After Width: | Height: | Size: 169 KiB |
BIN
static/img/opnsense-vm-carp-status.png
Normal file
After Width: | Height: | Size: 148 KiB |
BIN
static/img/opnsense-vm-create-vip-carp.png
Normal file
After Width: | Height: | Size: 139 KiB |
BIN
static/img/opnsense-vm-dnsmasq-add-option.png
Normal file
After Width: | Height: | Size: 27 KiB |