Update worker_node_install.sh

This commit is contained in:
RomanNum3ral 2026-03-27 23:15:54 +00:00
parent 5e8f0e8d87
commit 53402a93ed
1 changed files with 239 additions and 241 deletions

View File

@ -2,26 +2,40 @@
set -Eeuo pipefail set -Eeuo pipefail
######################################## ########################################
# Arch Linux Kubernetes Worker Node # Arch Linux RKE2 Worker Node
# Fully automated worker node preparation #
# + Official Kubernetes binaries pinned to 1.34.x # What this script does:
# Ready for manual kubeadm join # - Disables swap
# - Installs required Arch packages
# - Configures kernel modules and sysctl for Kubernetes
# - Configures NetworkManager to ignore CNI interfaces
# - Disables host nftables service to avoid breaking RKE2 service routing
# - Installs RKE2 agent pinned to the same version as the master
# - Optionally joins the worker to the cluster automatically
#
# Optional environment variables:
# RKE2_VERSION=v1.34.5+rke2r1
# SERVER_URL=https://10.28.24.17:9345
# RKE2_TOKEN=your-node-token
# WORKER_NODE_NAME=arch-kubernetes-worker1
# START_RKE2=true
#
# Notes:
# - If SERVER_URL and RKE2_TOKEN are both set, the script will configure
# and start the worker automatically.
# - If they are not set, the script will install everything and stop after
# preparing the node.
######################################## ########################################
# ---------- Config ---------- RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
K8S_VERSION="${K8S_VERSION:-v1.34.6}" SERVER_URL="${SERVER_URL:-}"
K8S_SERIES_REGEX='^v1\.34\.[0-9]+$' RKE2_TOKEN="${RKE2_TOKEN:-}"
K8S_ARCH="${K8S_ARCH:-amd64}" WORKER_NODE_NAME="${WORKER_NODE_NAME:-}"
START_RKE2="${START_RKE2:-true}"
# Binary locations RKE2_CONFIG_DIR="/etc/rancher/rke2"
KUBEADM_BIN="/usr/local/bin/kubeadm" RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
KUBECTL_BIN="/usr/local/bin/kubectl"
KUBELET_BIN="/usr/local/bin/kubelet"
# Optional output file for your later manual join command
JOIN_HINT_FILE="${JOIN_HINT_FILE:-/root/kubeadm-join-example.txt}"
# ---------- Logging ----------
log() { log() {
echo echo
echo "============================================================" echo "============================================================"
@ -40,256 +54,240 @@ die() {
exit 1 exit 1
} }
# ---------- Helpers ----------
require_cmd() {
command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1"
}
download_k8s_binary() {
local name="$1"
local tmpdir
tmpdir="$(mktemp -d)"
curl -fsSL -o "${tmpdir}/${name}" \
"https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}"
curl -fsSL -o "${tmpdir}/${name}.sha256" \
"https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}.sha256"
(
cd "${tmpdir}"
echo "$(cat "${name}.sha256") ${name}" | sha256sum --check --status
) || die "Checksum verification failed for ${name} ${K8S_VERSION}"
install -o root -g root -m 0755 "${tmpdir}/${name}" "/usr/local/bin/${name}"
rm -rf "${tmpdir}"
}
install_kubelet_service() {
log "Installing kubelet systemd service"
mkdir -p /etc/systemd/system/kubelet.service.d
touch /etc/default/kubelet
cat >/etc/systemd/system/kubelet.service <<'EOF'
[Unit]
Description=kubelet: The Kubernetes Node Agent
Documentation=https://kubernetes.io/docs/
After=containerd.service network-online.target
Wants=network-online.target
Requires=containerd.service
[Service]
ExecStart=/usr/local/bin/kubelet
Restart=always
StartLimitInterval=0
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
cat >/etc/systemd/system/kubelet.service.d/10-kubeadm.conf <<'EOF'
[Service]
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
EnvironmentFile=-/etc/default/kubelet
ExecStart=
ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
EOF
}
cleanup_old_k8s_state() {
log "Cleaning up any previous Kubernetes worker state"
kubeadm reset -f >/dev/null 2>&1 || true
rm -rf /etc/cni/net.d \
/var/lib/cni \
/etc/kubernetes \
/var/lib/kubelet/pki \
/var/lib/kubelet/config.yaml \
/var/lib/kubelet/kubeadm-flags.env
ip link delete cni0 2>/dev/null || true
ip link delete flannel.1 2>/dev/null || true
ip link delete kube-ipvs0 2>/dev/null || true
}
# ---------- Root check ----------
if [[ "${EUID}" -ne 0 ]]; then
die "Run this script as root, for example: sudo ./worker_node_prepare.sh"
fi
# ---------- Cleanup on error ----------
on_error() { on_error() {
local exit_code=$? local exit_code=$?
warn "Script failed on line $1 with exit code ${exit_code}" local line_no=$1
warn "Script failed on line ${line_no} with exit code ${exit_code}"
warn "Useful diagnostics:" warn "Useful diagnostics:"
echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200" echo " sudo systemctl status rke2-agent -l --no-pager"
echo " systemctl status containerd kubelet --no-pager" echo " sudo journalctl -u rke2-agent -n 200 --no-pager"
echo " sudo cat ${RKE2_CONFIG_FILE}"
exit "${exit_code}" exit "${exit_code}"
} }
trap 'on_error $LINENO' ERR trap 'on_error $LINENO' ERR
# ---------- Version guard ---------- require_root() {
[[ "${K8S_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \ [[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0"
"This worker script is intended for Kubernetes 1.34.x to match your master node. Current K8S_VERSION=${K8S_VERSION}" }
# ---------- Step 1: Disable swap ---------- disable_swap() {
log "Disabling swap immediately" log "Disabling swap"
swapoff -a || true
log "Disabling swap persistently in /etc/fstab" swapoff -a || true
if [[ -f /etc/fstab ]]; then
cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S)
sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab
fi
# ---------- Step 2: Update system ---------- if [[ -f /etc/fstab ]]; then
log "Updating package databases and system packages" cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
pacman -Syu --noconfirm sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rke2-worker-script /' /etc/fstab
# ---------- Step 3: Resolve iptables conflict automatically ----------
log "Resolving iptables backend for Kubernetes"
if pacman -Q iptables >/dev/null 2>&1; then
log "Removing legacy iptables package so iptables-nft can be installed"
pacman -Rdd --noconfirm iptables || true
fi
# ---------- Step 4: Install required Arch packages ----------
log "Installing runtime and support packages from Arch"
pacman -S --needed --noconfirm \
ca-certificates \
curl \
containerd \
cni-plugins \
crictl \
ethtool \
iptables-nft \
conntrack-tools \
socat \
tar \
gzip \
jq \
openssl
# ---------- Step 5: Remove Arch Kubernetes packages if present ----------
log "Removing Arch-provided kubeadm/kubectl/kubelet if present"
for pkg in kubeadm kubectl kubelet; do
if pacman -Q "${pkg}" >/dev/null 2>&1; then
pacman -Rdd --noconfirm "${pkg}" || true
fi fi
done }
# ---------- Step 6: Install pinned Kubernetes binaries ---------- install_packages() {
log "Installing Kubernetes binaries ${K8S_VERSION}" log "Installing required Arch packages"
download_k8s_binary kubeadm
download_k8s_binary kubectl
download_k8s_binary kubelet
require_cmd "${KUBEADM_BIN}" pacman -Sy --noconfirm archlinux-keyring
require_cmd "${KUBECTL_BIN}"
require_cmd "${KUBELET_BIN}"
# ---------- Step 7: Kernel modules ---------- if pacman -Q iptables >/dev/null 2>&1; then
log "Configuring required kernel modules" pacman -Rdd --noconfirm iptables || true
cat >/etc/modules-load.d/k8s.conf <<'EOF' fi
pacman -Syu --noconfirm
pacman -S --needed --noconfirm \
bash-completion \
ca-certificates \
cni-plugins \
conntrack-tools \
curl \
ethtool \
gzip \
iproute2 \
iptables-nft \
jq \
nfs-utils \
open-iscsi \
openssl \
socat \
tar \
unzip \
wget
}
configure_kernel() {
log "Configuring kernel modules and sysctl"
cat >/etc/modules-load.d/k8s.conf <<'EOF'
overlay overlay
br_netfilter br_netfilter
EOF EOF
modprobe overlay modprobe overlay
modprobe br_netfilter modprobe br_netfilter
# ---------- Step 8: Sysctl ---------- cat >/etc/sysctl.d/90-kubernetes.conf <<'EOF'
log "Configuring Kubernetes sysctl settings"
cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF'
net.bridge.bridge-nf-call-iptables = 1 net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1 net.ipv4.ip_forward = 1
EOF EOF
sysctl --system sysctl --system >/dev/null
}
# ---------- Step 9: Configure containerd ---------- configure_networkmanager() {
log "Configuring containerd" if systemctl is-enabled NetworkManager >/dev/null 2>&1 || systemctl is-active NetworkManager >/dev/null 2>&1; then
mkdir -p /etc/containerd log "Configuring NetworkManager to ignore CNI interfaces"
if [[ ! -f /etc/containerd/config.toml ]]; then mkdir -p /etc/NetworkManager/conf.d
containerd config default >/etc/containerd/config.toml cat >/etc/NetworkManager/conf.d/rke2-cni.conf <<'EOF'
else [keyfile]
cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S) unmanaged-devices=interface-name:cali*;interface-name:flannel*;interface-name:cni*;interface-name:vxlan.calico;interface-name:kube-ipvs0;interface-name:nodelocaldns;interface-name:tunl*
fi
sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
# ---------- Step 10: Install kubelet service ----------
install_kubelet_service
# ---------- Step 11: Clean previous worker state ----------
cleanup_old_k8s_state
# ---------- Step 12: Enable services ----------
log "Enabling and starting containerd and kubelet"
systemctl daemon-reload
systemctl enable --now containerd
systemctl enable --now kubelet
# ---------- Step 13: Wait for containerd ----------
log "Waiting for containerd to become active"
for i in {1..20}; do
if systemctl is-active --quiet containerd; then
break
fi
sleep 1
done
systemctl is-active --quiet containerd || die "containerd did not start successfully"
# ---------- Step 14: Verify pinned versions ----------
log "Verifying installed Kubernetes component versions"
KUBEADM_VERSION="$("${KUBEADM_BIN}" version -o short 2>/dev/null || true)"
KUBECTL_VERSION="$("${KUBECTL_BIN}" version --client -o json 2>/dev/null | jq -r '.clientVersion.gitVersion // empty')"
KUBELET_VERSION="$("${KUBELET_BIN}" --version 2>/dev/null | awk '{print $2}')"
[[ "${KUBEADM_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubeadm version mismatch: ${KUBEADM_VERSION}"
[[ "${KUBECTL_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubectl version mismatch: ${KUBECTL_VERSION}"
[[ "${KUBELET_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubelet version mismatch: ${KUBELET_VERSION}"
# ---------- Step 15: Write join hint ----------
log "Writing manual join hint"
cat >"${JOIN_HINT_FILE}" <<'EOF'
Run your worker join command manually, for example:
sudo kubeadm join <CONTROL_PLANE_IP>:6443 --token <TOKEN> \
--discovery-token-ca-cert-hash sha256:<HASH>
EOF EOF
chmod 600 "${JOIN_HINT_FILE}"
# ---------- Final output ---------- systemctl restart NetworkManager
echo fi
echo "Worker node preparation is complete."
echo if systemctl list-unit-files | grep -q '^nm-cloud-setup.service'; then
echo "Pinned Kubernetes version:" systemctl disable --now nm-cloud-setup.service || true
echo " ${K8S_VERSION}" fi
echo if systemctl list-unit-files | grep -q '^nm-cloud-setup.timer'; then
echo "Installed binaries:" systemctl disable --now nm-cloud-setup.timer || true
echo " ${KUBEADM_BIN}" fi
echo " ${KUBECTL_BIN}" }
echo " ${KUBELET_BIN}"
echo enable_support_services() {
echo "Services:" log "Enabling support services"
echo " containerd: $(systemctl is-active containerd || true)"
echo " kubelet: $(systemctl is-active kubelet || true)" systemctl enable --now iscsid.service || true
echo
echo "Next step:" # Do NOT enable nftables.service here.
echo " Run your kubeadm join command manually on this worker." # On this Arch + RKE2 setup it can break service routing for cluster IPs.
echo systemctl stop nftables.service >/dev/null 2>&1 || true
echo "Example hint saved to:" systemctl disable nftables.service >/dev/null 2>&1 || true
echo " ${JOIN_HINT_FILE}" nft flush ruleset >/dev/null 2>&1 || true
echo }
echo "Example:"
echo " sudo kubeadm join <CONTROL_PLANE_IP>:6443 --token <TOKEN> \\" install_rke2_agent() {
echo " --discovery-token-ca-cert-hash sha256:<HASH>" log "Installing RKE2 agent ${RKE2_VERSION}"
echo
mkdir -p "${RKE2_CONFIG_DIR}"
curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE=agent INSTALL_RKE2_VERSION="${RKE2_VERSION}" sh -
mkdir -p /etc/profile.d
cat >/etc/profile.d/rke2-path.sh <<'EOF'
export PATH=$PATH:/var/lib/rancher/rke2/bin:/usr/local/bin
EOF
}
write_config_if_possible() {
log "Writing RKE2 agent config"
{
if [[ -n "${SERVER_URL}" ]]; then
echo "server: ${SERVER_URL}"
fi
if [[ -n "${RKE2_TOKEN}" ]]; then
echo "token: ${RKE2_TOKEN}"
fi
if [[ -n "${WORKER_NODE_NAME}" ]]; then
echo "node-name: ${WORKER_NODE_NAME}"
fi
} > "${RKE2_CONFIG_FILE}"
chmod 600 "${RKE2_CONFIG_FILE}"
}
start_agent_if_possible() {
systemctl daemon-reload
systemctl enable rke2-agent.service
if [[ "${START_RKE2}" != "true" ]]; then
warn "START_RKE2=false, leaving rke2-agent disabled from startup execution"
return
fi
if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then
warn "SERVER_URL and/or RKE2_TOKEN not set. Worker is prepared but not joined."
return
fi
log "Starting RKE2 agent"
systemctl restart rke2-agent.service
}
wait_for_agent() {
if [[ "${START_RKE2}" != "true" ]]; then
return
fi
if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then
return
fi
log "Waiting for rke2-agent service"
local waited=0
until systemctl is-active --quiet rke2-agent.service; do
sleep 5
waited=$((waited + 5))
if (( waited % 30 == 0 )); then
warn "rke2-agent not active yet; recent logs:"
journalctl -u rke2-agent -n 40 --no-pager || true
fi
if (( waited >= 600 )); then
journalctl -u rke2-agent -n 200 --no-pager || true
die "Timed out waiting for rke2-agent to become active"
fi
done
}
print_summary() {
log "Worker node preparation complete"
echo "RKE2 version: ${RKE2_VERSION}"
echo "Config file: ${RKE2_CONFIG_FILE}"
echo "Server URL: ${SERVER_URL:-<not set>}"
echo "Node name: ${WORKER_NODE_NAME:-<default hostname>}"
echo
if [[ -n "${SERVER_URL}" && -n "${RKE2_TOKEN}" && "${START_RKE2}" == "true" ]]; then
echo "Worker attempted automatic join."
echo "Check from the master with:"
echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide"
echo
echo "Local diagnostics:"
echo " sudo systemctl status rke2-agent --no-pager"
echo " sudo journalctl -u rke2-agent -n 200 --no-pager"
else
echo "Worker is installed and ready, but not joined yet."
echo
echo "To join later, set these in ${RKE2_CONFIG_FILE}:"
echo " server: https://YOUR_MASTER_IP:9345"
echo " token: YOUR_NODE_TOKEN"
if [[ -n "${WORKER_NODE_NAME}" ]]; then
echo " node-name: ${WORKER_NODE_NAME}"
fi
echo
echo "Then run:"
echo " sudo systemctl enable --now rke2-agent"
fi
}
main() {
require_root
disable_swap
install_packages
configure_kernel
configure_networkmanager
enable_support_services
install_rke2_agent
write_config_if_possible
start_agent_if_possible
wait_for_agent
print_summary
}
main "$@"