kubernetes-arch-install/worker_node_install.sh

293 lines
7.3 KiB
Bash

#!/usr/bin/env bash
set -Eeuo pipefail
########################################
# Arch Linux RKE2 Worker Node
#
# What this script does:
# - Disables swap
# - Installs required Arch packages
# - Configures kernel modules and sysctl for Kubernetes
# - Configures NetworkManager to ignore CNI interfaces
# - Disables host nftables service to avoid breaking RKE2 service routing
# - Installs RKE2 agent pinned to the same version as the master
# - Optionally joins the worker to the cluster automatically
#
# Optional environment variables:
# RKE2_VERSION=v1.34.5+rke2r1
# SERVER_URL=https://10.28.24.17:9345
# RKE2_TOKEN=your-node-token
# WORKER_NODE_NAME=arch-kubernetes-worker1
# START_RKE2=true
#
# Notes:
# - If SERVER_URL and RKE2_TOKEN are both set, the script will configure
# and start the worker automatically.
# - If they are not set, the script will install everything and stop after
# preparing the node.
########################################
RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
SERVER_URL="${SERVER_URL:-}"
RKE2_TOKEN="${RKE2_TOKEN:-}"
WORKER_NODE_NAME="${WORKER_NODE_NAME:-}"
START_RKE2="${START_RKE2:-true}"
RKE2_CONFIG_DIR="/etc/rancher/rke2"
RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
log() {
echo
echo "============================================================"
echo "[INFO] $*"
echo "============================================================"
}
warn() {
echo
echo "[WARN] $*" >&2
}
die() {
echo
echo "[ERROR] $*" >&2
exit 1
}
on_error() {
local exit_code=$?
local line_no=$1
warn "Script failed on line ${line_no} with exit code ${exit_code}"
warn "Useful diagnostics:"
echo " sudo systemctl status rke2-agent -l --no-pager"
echo " sudo journalctl -u rke2-agent -n 200 --no-pager"
echo " sudo cat ${RKE2_CONFIG_FILE}"
exit "${exit_code}"
}
trap 'on_error $LINENO' ERR
require_root() {
[[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0"
}
disable_swap() {
log "Disabling swap"
swapoff -a || true
if [[ -f /etc/fstab ]]; then
cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rke2-worker-script /' /etc/fstab
fi
}
install_packages() {
log "Installing required Arch packages"
pacman -Sy --noconfirm archlinux-keyring
if pacman -Q iptables >/dev/null 2>&1; then
pacman -Rdd --noconfirm iptables || true
fi
pacman -Syu --noconfirm
pacman -S --needed --noconfirm \
bash-completion \
ca-certificates \
cni-plugins \
conntrack-tools \
curl \
ethtool \
gzip \
iproute2 \
iptables-nft \
jq \
nfs-utils \
open-iscsi \
openssl \
socat \
tar \
unzip \
wget
}
configure_kernel() {
log "Configuring kernel modules and sysctl"
cat >/etc/modules-load.d/k8s.conf <<'EOF'
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
cat >/etc/sysctl.d/90-kubernetes.conf <<'EOF'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sysctl --system >/dev/null
}
configure_networkmanager() {
if systemctl is-enabled NetworkManager >/dev/null 2>&1 || systemctl is-active NetworkManager >/dev/null 2>&1; then
log "Configuring NetworkManager to ignore CNI interfaces"
mkdir -p /etc/NetworkManager/conf.d
cat >/etc/NetworkManager/conf.d/rke2-cni.conf <<'EOF'
[keyfile]
unmanaged-devices=interface-name:cali*;interface-name:flannel*;interface-name:cni*;interface-name:vxlan.calico;interface-name:kube-ipvs0;interface-name:nodelocaldns;interface-name:tunl*
EOF
systemctl restart NetworkManager
fi
if systemctl list-unit-files | grep -q '^nm-cloud-setup.service'; then
systemctl disable --now nm-cloud-setup.service || true
fi
if systemctl list-unit-files | grep -q '^nm-cloud-setup.timer'; then
systemctl disable --now nm-cloud-setup.timer || true
fi
}
enable_support_services() {
log "Enabling support services"
systemctl enable --now iscsid.service || true
# Do NOT enable nftables.service here.
# On this Arch + RKE2 setup it can break service routing for cluster IPs.
systemctl stop nftables.service >/dev/null 2>&1 || true
systemctl disable nftables.service >/dev/null 2>&1 || true
nft flush ruleset >/dev/null 2>&1 || true
}
install_rke2_agent() {
log "Installing RKE2 agent ${RKE2_VERSION}"
mkdir -p "${RKE2_CONFIG_DIR}"
curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE=agent INSTALL_RKE2_VERSION="${RKE2_VERSION}" sh -
mkdir -p /etc/profile.d
cat >/etc/profile.d/rke2-path.sh <<'EOF'
export PATH=$PATH:/var/lib/rancher/rke2/bin:/usr/local/bin
EOF
}
write_config_if_possible() {
log "Writing RKE2 agent config"
{
if [[ -n "${SERVER_URL}" ]]; then
echo "server: ${SERVER_URL}"
fi
if [[ -n "${RKE2_TOKEN}" ]]; then
echo "token: ${RKE2_TOKEN}"
fi
if [[ -n "${WORKER_NODE_NAME}" ]]; then
echo "node-name: ${WORKER_NODE_NAME}"
fi
} > "${RKE2_CONFIG_FILE}"
chmod 600 "${RKE2_CONFIG_FILE}"
}
start_agent_if_possible() {
systemctl daemon-reload
systemctl enable rke2-agent.service
if [[ "${START_RKE2}" != "true" ]]; then
warn "START_RKE2=false, leaving rke2-agent disabled from startup execution"
return
fi
if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then
warn "SERVER_URL and/or RKE2_TOKEN not set. Worker is prepared but not joined."
return
fi
log "Starting RKE2 agent"
systemctl restart rke2-agent.service
}
wait_for_agent() {
if [[ "${START_RKE2}" != "true" ]]; then
return
fi
if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then
return
fi
log "Waiting for rke2-agent service"
local waited=0
until systemctl is-active --quiet rke2-agent.service; do
sleep 5
waited=$((waited + 5))
if (( waited % 30 == 0 )); then
warn "rke2-agent not active yet; recent logs:"
journalctl -u rke2-agent -n 40 --no-pager || true
fi
if (( waited >= 600 )); then
journalctl -u rke2-agent -n 200 --no-pager || true
die "Timed out waiting for rke2-agent to become active"
fi
done
}
print_summary() {
log "Worker node preparation complete"
echo "RKE2 version: ${RKE2_VERSION}"
echo "Config file: ${RKE2_CONFIG_FILE}"
echo "Server URL: ${SERVER_URL:-<not set>}"
echo "Node name: ${WORKER_NODE_NAME:-<default hostname>}"
echo
if [[ -n "${SERVER_URL}" && -n "${RKE2_TOKEN}" && "${START_RKE2}" == "true" ]]; then
echo "Worker attempted automatic join."
echo "Check from the master with:"
echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide"
echo
echo "Local diagnostics:"
echo " sudo systemctl status rke2-agent --no-pager"
echo " sudo journalctl -u rke2-agent -n 200 --no-pager"
else
echo "Worker is installed and ready, but not joined yet."
echo
echo "To join later, set these in ${RKE2_CONFIG_FILE}:"
echo " server: https://YOUR_MASTER_IP:9345"
echo " token: YOUR_NODE_TOKEN"
if [[ -n "${WORKER_NODE_NAME}" ]]; then
echo " node-name: ${WORKER_NODE_NAME}"
fi
echo
echo "Then run:"
echo " sudo systemctl enable --now rke2-agent"
fi
}
main() {
require_root
disable_swap
install_packages
configure_kernel
configure_networkmanager
enable_support_services
install_rke2_agent
write_config_if_possible
start_agent_if_possible
wait_for_agent
print_summary
}
main "$@"