diff --git a/worker_node_install.sh b/worker_node_install.sh index 2e313c8..f68c094 100644 --- a/worker_node_install.sh +++ b/worker_node_install.sh @@ -2,26 +2,40 @@ set -Eeuo pipefail ######################################## -# Arch Linux Kubernetes Worker Node -# Fully automated worker node preparation -# + Official Kubernetes binaries pinned to 1.34.x -# Ready for manual kubeadm join +# Arch Linux RKE2 Worker Node +# +# What this script does: +# - Disables swap +# - Installs required Arch packages +# - Configures kernel modules and sysctl for Kubernetes +# - Configures NetworkManager to ignore CNI interfaces +# - Disables host nftables service to avoid breaking RKE2 service routing +# - Installs RKE2 agent pinned to the same version as the master +# - Optionally joins the worker to the cluster automatically +# +# Optional environment variables: +# RKE2_VERSION=v1.34.5+rke2r1 +# SERVER_URL=https://10.28.24.17:9345 +# RKE2_TOKEN=your-node-token +# WORKER_NODE_NAME=arch-kubernetes-worker1 +# START_RKE2=true +# +# Notes: +# - If SERVER_URL and RKE2_TOKEN are both set, the script will configure +# and start the worker automatically. +# - If they are not set, the script will install everything and stop after +# preparing the node. ######################################## -# ---------- Config ---------- -K8S_VERSION="${K8S_VERSION:-v1.34.6}" -K8S_SERIES_REGEX='^v1\.34\.[0-9]+$' -K8S_ARCH="${K8S_ARCH:-amd64}" +RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}" +SERVER_URL="${SERVER_URL:-}" +RKE2_TOKEN="${RKE2_TOKEN:-}" +WORKER_NODE_NAME="${WORKER_NODE_NAME:-}" +START_RKE2="${START_RKE2:-true}" -# Binary locations -KUBEADM_BIN="/usr/local/bin/kubeadm" -KUBECTL_BIN="/usr/local/bin/kubectl" -KUBELET_BIN="/usr/local/bin/kubelet" +RKE2_CONFIG_DIR="/etc/rancher/rke2" +RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml" -# Optional output file for your later manual join command -JOIN_HINT_FILE="${JOIN_HINT_FILE:-/root/kubeadm-join-example.txt}" - -# ---------- Logging ---------- log() { echo echo "============================================================" @@ -40,256 +54,240 @@ die() { exit 1 } -# ---------- Helpers ---------- -require_cmd() { - command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1" -} - -download_k8s_binary() { - local name="$1" - local tmpdir - tmpdir="$(mktemp -d)" - - curl -fsSL -o "${tmpdir}/${name}" \ - "https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}" - - curl -fsSL -o "${tmpdir}/${name}.sha256" \ - "https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}.sha256" - - ( - cd "${tmpdir}" - echo "$(cat "${name}.sha256") ${name}" | sha256sum --check --status - ) || die "Checksum verification failed for ${name} ${K8S_VERSION}" - - install -o root -g root -m 0755 "${tmpdir}/${name}" "/usr/local/bin/${name}" - rm -rf "${tmpdir}" -} - -install_kubelet_service() { - log "Installing kubelet systemd service" - - mkdir -p /etc/systemd/system/kubelet.service.d - touch /etc/default/kubelet - - cat >/etc/systemd/system/kubelet.service <<'EOF' -[Unit] -Description=kubelet: The Kubernetes Node Agent -Documentation=https://kubernetes.io/docs/ -After=containerd.service network-online.target -Wants=network-online.target -Requires=containerd.service - -[Service] -ExecStart=/usr/local/bin/kubelet -Restart=always -StartLimitInterval=0 -RestartSec=10 - -[Install] -WantedBy=multi-user.target -EOF - - cat >/etc/systemd/system/kubelet.service.d/10-kubeadm.conf <<'EOF' -[Service] -Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf" -Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml" -EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env -EnvironmentFile=-/etc/default/kubelet -ExecStart= -ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS -EOF -} - -cleanup_old_k8s_state() { - log "Cleaning up any previous Kubernetes worker state" - - kubeadm reset -f >/dev/null 2>&1 || true - - rm -rf /etc/cni/net.d \ - /var/lib/cni \ - /etc/kubernetes \ - /var/lib/kubelet/pki \ - /var/lib/kubelet/config.yaml \ - /var/lib/kubelet/kubeadm-flags.env - - ip link delete cni0 2>/dev/null || true - ip link delete flannel.1 2>/dev/null || true - ip link delete kube-ipvs0 2>/dev/null || true -} - -# ---------- Root check ---------- -if [[ "${EUID}" -ne 0 ]]; then - die "Run this script as root, for example: sudo ./worker_node_prepare.sh" -fi - -# ---------- Cleanup on error ---------- on_error() { local exit_code=$? - warn "Script failed on line $1 with exit code ${exit_code}" + local line_no=$1 + + warn "Script failed on line ${line_no} with exit code ${exit_code}" warn "Useful diagnostics:" - echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200" - echo " systemctl status containerd kubelet --no-pager" + echo " sudo systemctl status rke2-agent -l --no-pager" + echo " sudo journalctl -u rke2-agent -n 200 --no-pager" + echo " sudo cat ${RKE2_CONFIG_FILE}" exit "${exit_code}" } trap 'on_error $LINENO' ERR -# ---------- Version guard ---------- -[[ "${K8S_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \ - "This worker script is intended for Kubernetes 1.34.x to match your master node. Current K8S_VERSION=${K8S_VERSION}" +require_root() { + [[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0" +} -# ---------- Step 1: Disable swap ---------- -log "Disabling swap immediately" -swapoff -a || true +disable_swap() { + log "Disabling swap" -log "Disabling swap persistently in /etc/fstab" -if [[ -f /etc/fstab ]]; then - cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S) - sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab -fi + swapoff -a || true -# ---------- Step 2: Update system ---------- -log "Updating package databases and system packages" -pacman -Syu --noconfirm - -# ---------- Step 3: Resolve iptables conflict automatically ---------- -log "Resolving iptables backend for Kubernetes" -if pacman -Q iptables >/dev/null 2>&1; then - log "Removing legacy iptables package so iptables-nft can be installed" - pacman -Rdd --noconfirm iptables || true -fi - -# ---------- Step 4: Install required Arch packages ---------- -log "Installing runtime and support packages from Arch" -pacman -S --needed --noconfirm \ - ca-certificates \ - curl \ - containerd \ - cni-plugins \ - crictl \ - ethtool \ - iptables-nft \ - conntrack-tools \ - socat \ - tar \ - gzip \ - jq \ - openssl - -# ---------- Step 5: Remove Arch Kubernetes packages if present ---------- -log "Removing Arch-provided kubeadm/kubectl/kubelet if present" -for pkg in kubeadm kubectl kubelet; do - if pacman -Q "${pkg}" >/dev/null 2>&1; then - pacman -Rdd --noconfirm "${pkg}" || true + if [[ -f /etc/fstab ]]; then + cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)" + sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rke2-worker-script /' /etc/fstab fi -done +} -# ---------- Step 6: Install pinned Kubernetes binaries ---------- -log "Installing Kubernetes binaries ${K8S_VERSION}" -download_k8s_binary kubeadm -download_k8s_binary kubectl -download_k8s_binary kubelet +install_packages() { + log "Installing required Arch packages" -require_cmd "${KUBEADM_BIN}" -require_cmd "${KUBECTL_BIN}" -require_cmd "${KUBELET_BIN}" + pacman -Sy --noconfirm archlinux-keyring -# ---------- Step 7: Kernel modules ---------- -log "Configuring required kernel modules" -cat >/etc/modules-load.d/k8s.conf <<'EOF' + if pacman -Q iptables >/dev/null 2>&1; then + pacman -Rdd --noconfirm iptables || true + fi + + pacman -Syu --noconfirm + pacman -S --needed --noconfirm \ + bash-completion \ + ca-certificates \ + cni-plugins \ + conntrack-tools \ + curl \ + ethtool \ + gzip \ + iproute2 \ + iptables-nft \ + jq \ + nfs-utils \ + open-iscsi \ + openssl \ + socat \ + tar \ + unzip \ + wget +} + +configure_kernel() { + log "Configuring kernel modules and sysctl" + + cat >/etc/modules-load.d/k8s.conf <<'EOF' overlay br_netfilter EOF -modprobe overlay -modprobe br_netfilter + modprobe overlay + modprobe br_netfilter -# ---------- Step 8: Sysctl ---------- -log "Configuring Kubernetes sysctl settings" -cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF' + cat >/etc/sysctl.d/90-kubernetes.conf <<'EOF' net.bridge.bridge-nf-call-iptables = 1 net.bridge.bridge-nf-call-ip6tables = 1 net.ipv4.ip_forward = 1 EOF -sysctl --system + sysctl --system >/dev/null +} -# ---------- Step 9: Configure containerd ---------- -log "Configuring containerd" -mkdir -p /etc/containerd +configure_networkmanager() { + if systemctl is-enabled NetworkManager >/dev/null 2>&1 || systemctl is-active NetworkManager >/dev/null 2>&1; then + log "Configuring NetworkManager to ignore CNI interfaces" -if [[ ! -f /etc/containerd/config.toml ]]; then - containerd config default >/etc/containerd/config.toml -else - cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S) -fi - -sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml - -# ---------- Step 10: Install kubelet service ---------- -install_kubelet_service - -# ---------- Step 11: Clean previous worker state ---------- -cleanup_old_k8s_state - -# ---------- Step 12: Enable services ---------- -log "Enabling and starting containerd and kubelet" -systemctl daemon-reload -systemctl enable --now containerd -systemctl enable --now kubelet - -# ---------- Step 13: Wait for containerd ---------- -log "Waiting for containerd to become active" -for i in {1..20}; do - if systemctl is-active --quiet containerd; then - break - fi - sleep 1 -done -systemctl is-active --quiet containerd || die "containerd did not start successfully" - -# ---------- Step 14: Verify pinned versions ---------- -log "Verifying installed Kubernetes component versions" -KUBEADM_VERSION="$("${KUBEADM_BIN}" version -o short 2>/dev/null || true)" -KUBECTL_VERSION="$("${KUBECTL_BIN}" version --client -o json 2>/dev/null | jq -r '.clientVersion.gitVersion // empty')" -KUBELET_VERSION="$("${KUBELET_BIN}" --version 2>/dev/null | awk '{print $2}')" - -[[ "${KUBEADM_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubeadm version mismatch: ${KUBEADM_VERSION}" -[[ "${KUBECTL_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubectl version mismatch: ${KUBECTL_VERSION}" -[[ "${KUBELET_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubelet version mismatch: ${KUBELET_VERSION}" - -# ---------- Step 15: Write join hint ---------- -log "Writing manual join hint" -cat >"${JOIN_HINT_FILE}" <<'EOF' -Run your worker join command manually, for example: - -sudo kubeadm join :6443 --token \ - --discovery-token-ca-cert-hash sha256: + mkdir -p /etc/NetworkManager/conf.d + cat >/etc/NetworkManager/conf.d/rke2-cni.conf <<'EOF' +[keyfile] +unmanaged-devices=interface-name:cali*;interface-name:flannel*;interface-name:cni*;interface-name:vxlan.calico;interface-name:kube-ipvs0;interface-name:nodelocaldns;interface-name:tunl* EOF -chmod 600 "${JOIN_HINT_FILE}" -# ---------- Final output ---------- -echo -echo "Worker node preparation is complete." -echo -echo "Pinned Kubernetes version:" -echo " ${K8S_VERSION}" -echo -echo "Installed binaries:" -echo " ${KUBEADM_BIN}" -echo " ${KUBECTL_BIN}" -echo " ${KUBELET_BIN}" -echo -echo "Services:" -echo " containerd: $(systemctl is-active containerd || true)" -echo " kubelet: $(systemctl is-active kubelet || true)" -echo -echo "Next step:" -echo " Run your kubeadm join command manually on this worker." -echo -echo "Example hint saved to:" -echo " ${JOIN_HINT_FILE}" -echo -echo "Example:" -echo " sudo kubeadm join :6443 --token \\" -echo " --discovery-token-ca-cert-hash sha256:" -echo \ No newline at end of file + systemctl restart NetworkManager + fi + + if systemctl list-unit-files | grep -q '^nm-cloud-setup.service'; then + systemctl disable --now nm-cloud-setup.service || true + fi + if systemctl list-unit-files | grep -q '^nm-cloud-setup.timer'; then + systemctl disable --now nm-cloud-setup.timer || true + fi +} + +enable_support_services() { + log "Enabling support services" + + systemctl enable --now iscsid.service || true + + # Do NOT enable nftables.service here. + # On this Arch + RKE2 setup it can break service routing for cluster IPs. + systemctl stop nftables.service >/dev/null 2>&1 || true + systemctl disable nftables.service >/dev/null 2>&1 || true + nft flush ruleset >/dev/null 2>&1 || true +} + +install_rke2_agent() { + log "Installing RKE2 agent ${RKE2_VERSION}" + + mkdir -p "${RKE2_CONFIG_DIR}" + + curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE=agent INSTALL_RKE2_VERSION="${RKE2_VERSION}" sh - + + mkdir -p /etc/profile.d + cat >/etc/profile.d/rke2-path.sh <<'EOF' +export PATH=$PATH:/var/lib/rancher/rke2/bin:/usr/local/bin +EOF +} + +write_config_if_possible() { + log "Writing RKE2 agent config" + + { + if [[ -n "${SERVER_URL}" ]]; then + echo "server: ${SERVER_URL}" + fi + + if [[ -n "${RKE2_TOKEN}" ]]; then + echo "token: ${RKE2_TOKEN}" + fi + + if [[ -n "${WORKER_NODE_NAME}" ]]; then + echo "node-name: ${WORKER_NODE_NAME}" + fi + } > "${RKE2_CONFIG_FILE}" + + chmod 600 "${RKE2_CONFIG_FILE}" +} + +start_agent_if_possible() { + systemctl daemon-reload + systemctl enable rke2-agent.service + + if [[ "${START_RKE2}" != "true" ]]; then + warn "START_RKE2=false, leaving rke2-agent disabled from startup execution" + return + fi + + if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then + warn "SERVER_URL and/or RKE2_TOKEN not set. Worker is prepared but not joined." + return + fi + + log "Starting RKE2 agent" + systemctl restart rke2-agent.service +} + +wait_for_agent() { + if [[ "${START_RKE2}" != "true" ]]; then + return + fi + + if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then + return + fi + + log "Waiting for rke2-agent service" + + local waited=0 + until systemctl is-active --quiet rke2-agent.service; do + sleep 5 + waited=$((waited + 5)) + + if (( waited % 30 == 0 )); then + warn "rke2-agent not active yet; recent logs:" + journalctl -u rke2-agent -n 40 --no-pager || true + fi + + if (( waited >= 600 )); then + journalctl -u rke2-agent -n 200 --no-pager || true + die "Timed out waiting for rke2-agent to become active" + fi + done +} + +print_summary() { + log "Worker node preparation complete" + + echo "RKE2 version: ${RKE2_VERSION}" + echo "Config file: ${RKE2_CONFIG_FILE}" + echo "Server URL: ${SERVER_URL:-}" + echo "Node name: ${WORKER_NODE_NAME:-}" + echo + + if [[ -n "${SERVER_URL}" && -n "${RKE2_TOKEN}" && "${START_RKE2}" == "true" ]]; then + echo "Worker attempted automatic join." + echo "Check from the master with:" + echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide" + echo + echo "Local diagnostics:" + echo " sudo systemctl status rke2-agent --no-pager" + echo " sudo journalctl -u rke2-agent -n 200 --no-pager" + else + echo "Worker is installed and ready, but not joined yet." + echo + echo "To join later, set these in ${RKE2_CONFIG_FILE}:" + echo " server: https://YOUR_MASTER_IP:9345" + echo " token: YOUR_NODE_TOKEN" + if [[ -n "${WORKER_NODE_NAME}" ]]; then + echo " node-name: ${WORKER_NODE_NAME}" + fi + echo + echo "Then run:" + echo " sudo systemctl enable --now rke2-agent" + fi +} + +main() { + require_root + disable_swap + install_packages + configure_kernel + configure_networkmanager + enable_support_services + install_rke2_agent + write_config_if_possible + start_agent_if_possible + wait_for_agent + print_summary +} + +main "$@" \ No newline at end of file