Update worker_node_install.sh
This commit is contained in:
parent
5e8f0e8d87
commit
53402a93ed
|
|
@ -2,26 +2,40 @@
|
|||
set -Eeuo pipefail
|
||||
|
||||
########################################
|
||||
# Arch Linux Kubernetes Worker Node
|
||||
# Fully automated worker node preparation
|
||||
# + Official Kubernetes binaries pinned to 1.34.x
|
||||
# Ready for manual kubeadm join
|
||||
# Arch Linux RKE2 Worker Node
|
||||
#
|
||||
# What this script does:
|
||||
# - Disables swap
|
||||
# - Installs required Arch packages
|
||||
# - Configures kernel modules and sysctl for Kubernetes
|
||||
# - Configures NetworkManager to ignore CNI interfaces
|
||||
# - Disables host nftables service to avoid breaking RKE2 service routing
|
||||
# - Installs RKE2 agent pinned to the same version as the master
|
||||
# - Optionally joins the worker to the cluster automatically
|
||||
#
|
||||
# Optional environment variables:
|
||||
# RKE2_VERSION=v1.34.5+rke2r1
|
||||
# SERVER_URL=https://10.28.24.17:9345
|
||||
# RKE2_TOKEN=your-node-token
|
||||
# WORKER_NODE_NAME=arch-kubernetes-worker1
|
||||
# START_RKE2=true
|
||||
#
|
||||
# Notes:
|
||||
# - If SERVER_URL and RKE2_TOKEN are both set, the script will configure
|
||||
# and start the worker automatically.
|
||||
# - If they are not set, the script will install everything and stop after
|
||||
# preparing the node.
|
||||
########################################
|
||||
|
||||
# ---------- Config ----------
|
||||
K8S_VERSION="${K8S_VERSION:-v1.34.6}"
|
||||
K8S_SERIES_REGEX='^v1\.34\.[0-9]+$'
|
||||
K8S_ARCH="${K8S_ARCH:-amd64}"
|
||||
RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
|
||||
SERVER_URL="${SERVER_URL:-}"
|
||||
RKE2_TOKEN="${RKE2_TOKEN:-}"
|
||||
WORKER_NODE_NAME="${WORKER_NODE_NAME:-}"
|
||||
START_RKE2="${START_RKE2:-true}"
|
||||
|
||||
# Binary locations
|
||||
KUBEADM_BIN="/usr/local/bin/kubeadm"
|
||||
KUBECTL_BIN="/usr/local/bin/kubectl"
|
||||
KUBELET_BIN="/usr/local/bin/kubelet"
|
||||
RKE2_CONFIG_DIR="/etc/rancher/rke2"
|
||||
RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
|
||||
|
||||
# Optional output file for your later manual join command
|
||||
JOIN_HINT_FILE="${JOIN_HINT_FILE:-/root/kubeadm-join-example.txt}"
|
||||
|
||||
# ---------- Logging ----------
|
||||
log() {
|
||||
echo
|
||||
echo "============================================================"
|
||||
|
|
@ -40,256 +54,240 @@ die() {
|
|||
exit 1
|
||||
}
|
||||
|
||||
# ---------- Helpers ----------
|
||||
require_cmd() {
|
||||
command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1"
|
||||
}
|
||||
|
||||
download_k8s_binary() {
|
||||
local name="$1"
|
||||
local tmpdir
|
||||
tmpdir="$(mktemp -d)"
|
||||
|
||||
curl -fsSL -o "${tmpdir}/${name}" \
|
||||
"https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}"
|
||||
|
||||
curl -fsSL -o "${tmpdir}/${name}.sha256" \
|
||||
"https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}.sha256"
|
||||
|
||||
(
|
||||
cd "${tmpdir}"
|
||||
echo "$(cat "${name}.sha256") ${name}" | sha256sum --check --status
|
||||
) || die "Checksum verification failed for ${name} ${K8S_VERSION}"
|
||||
|
||||
install -o root -g root -m 0755 "${tmpdir}/${name}" "/usr/local/bin/${name}"
|
||||
rm -rf "${tmpdir}"
|
||||
}
|
||||
|
||||
install_kubelet_service() {
|
||||
log "Installing kubelet systemd service"
|
||||
|
||||
mkdir -p /etc/systemd/system/kubelet.service.d
|
||||
touch /etc/default/kubelet
|
||||
|
||||
cat >/etc/systemd/system/kubelet.service <<'EOF'
|
||||
[Unit]
|
||||
Description=kubelet: The Kubernetes Node Agent
|
||||
Documentation=https://kubernetes.io/docs/
|
||||
After=containerd.service network-online.target
|
||||
Wants=network-online.target
|
||||
Requires=containerd.service
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/local/bin/kubelet
|
||||
Restart=always
|
||||
StartLimitInterval=0
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
cat >/etc/systemd/system/kubelet.service.d/10-kubeadm.conf <<'EOF'
|
||||
[Service]
|
||||
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
|
||||
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
||||
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
|
||||
EnvironmentFile=-/etc/default/kubelet
|
||||
ExecStart=
|
||||
ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
|
||||
EOF
|
||||
}
|
||||
|
||||
cleanup_old_k8s_state() {
|
||||
log "Cleaning up any previous Kubernetes worker state"
|
||||
|
||||
kubeadm reset -f >/dev/null 2>&1 || true
|
||||
|
||||
rm -rf /etc/cni/net.d \
|
||||
/var/lib/cni \
|
||||
/etc/kubernetes \
|
||||
/var/lib/kubelet/pki \
|
||||
/var/lib/kubelet/config.yaml \
|
||||
/var/lib/kubelet/kubeadm-flags.env
|
||||
|
||||
ip link delete cni0 2>/dev/null || true
|
||||
ip link delete flannel.1 2>/dev/null || true
|
||||
ip link delete kube-ipvs0 2>/dev/null || true
|
||||
}
|
||||
|
||||
# ---------- Root check ----------
|
||||
if [[ "${EUID}" -ne 0 ]]; then
|
||||
die "Run this script as root, for example: sudo ./worker_node_prepare.sh"
|
||||
fi
|
||||
|
||||
# ---------- Cleanup on error ----------
|
||||
on_error() {
|
||||
local exit_code=$?
|
||||
warn "Script failed on line $1 with exit code ${exit_code}"
|
||||
local line_no=$1
|
||||
|
||||
warn "Script failed on line ${line_no} with exit code ${exit_code}"
|
||||
warn "Useful diagnostics:"
|
||||
echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200"
|
||||
echo " systemctl status containerd kubelet --no-pager"
|
||||
echo " sudo systemctl status rke2-agent -l --no-pager"
|
||||
echo " sudo journalctl -u rke2-agent -n 200 --no-pager"
|
||||
echo " sudo cat ${RKE2_CONFIG_FILE}"
|
||||
exit "${exit_code}"
|
||||
}
|
||||
trap 'on_error $LINENO' ERR
|
||||
|
||||
# ---------- Version guard ----------
|
||||
[[ "${K8S_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \
|
||||
"This worker script is intended for Kubernetes 1.34.x to match your master node. Current K8S_VERSION=${K8S_VERSION}"
|
||||
require_root() {
|
||||
[[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0"
|
||||
}
|
||||
|
||||
# ---------- Step 1: Disable swap ----------
|
||||
log "Disabling swap immediately"
|
||||
swapoff -a || true
|
||||
disable_swap() {
|
||||
log "Disabling swap"
|
||||
|
||||
log "Disabling swap persistently in /etc/fstab"
|
||||
if [[ -f /etc/fstab ]]; then
|
||||
cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S)
|
||||
sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab
|
||||
fi
|
||||
swapoff -a || true
|
||||
|
||||
# ---------- Step 2: Update system ----------
|
||||
log "Updating package databases and system packages"
|
||||
pacman -Syu --noconfirm
|
||||
|
||||
# ---------- Step 3: Resolve iptables conflict automatically ----------
|
||||
log "Resolving iptables backend for Kubernetes"
|
||||
if pacman -Q iptables >/dev/null 2>&1; then
|
||||
log "Removing legacy iptables package so iptables-nft can be installed"
|
||||
pacman -Rdd --noconfirm iptables || true
|
||||
fi
|
||||
|
||||
# ---------- Step 4: Install required Arch packages ----------
|
||||
log "Installing runtime and support packages from Arch"
|
||||
pacman -S --needed --noconfirm \
|
||||
ca-certificates \
|
||||
curl \
|
||||
containerd \
|
||||
cni-plugins \
|
||||
crictl \
|
||||
ethtool \
|
||||
iptables-nft \
|
||||
conntrack-tools \
|
||||
socat \
|
||||
tar \
|
||||
gzip \
|
||||
jq \
|
||||
openssl
|
||||
|
||||
# ---------- Step 5: Remove Arch Kubernetes packages if present ----------
|
||||
log "Removing Arch-provided kubeadm/kubectl/kubelet if present"
|
||||
for pkg in kubeadm kubectl kubelet; do
|
||||
if pacman -Q "${pkg}" >/dev/null 2>&1; then
|
||||
pacman -Rdd --noconfirm "${pkg}" || true
|
||||
if [[ -f /etc/fstab ]]; then
|
||||
cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
|
||||
sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rke2-worker-script /' /etc/fstab
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# ---------- Step 6: Install pinned Kubernetes binaries ----------
|
||||
log "Installing Kubernetes binaries ${K8S_VERSION}"
|
||||
download_k8s_binary kubeadm
|
||||
download_k8s_binary kubectl
|
||||
download_k8s_binary kubelet
|
||||
install_packages() {
|
||||
log "Installing required Arch packages"
|
||||
|
||||
require_cmd "${KUBEADM_BIN}"
|
||||
require_cmd "${KUBECTL_BIN}"
|
||||
require_cmd "${KUBELET_BIN}"
|
||||
pacman -Sy --noconfirm archlinux-keyring
|
||||
|
||||
# ---------- Step 7: Kernel modules ----------
|
||||
log "Configuring required kernel modules"
|
||||
cat >/etc/modules-load.d/k8s.conf <<'EOF'
|
||||
if pacman -Q iptables >/dev/null 2>&1; then
|
||||
pacman -Rdd --noconfirm iptables || true
|
||||
fi
|
||||
|
||||
pacman -Syu --noconfirm
|
||||
pacman -S --needed --noconfirm \
|
||||
bash-completion \
|
||||
ca-certificates \
|
||||
cni-plugins \
|
||||
conntrack-tools \
|
||||
curl \
|
||||
ethtool \
|
||||
gzip \
|
||||
iproute2 \
|
||||
iptables-nft \
|
||||
jq \
|
||||
nfs-utils \
|
||||
open-iscsi \
|
||||
openssl \
|
||||
socat \
|
||||
tar \
|
||||
unzip \
|
||||
wget
|
||||
}
|
||||
|
||||
configure_kernel() {
|
||||
log "Configuring kernel modules and sysctl"
|
||||
|
||||
cat >/etc/modules-load.d/k8s.conf <<'EOF'
|
||||
overlay
|
||||
br_netfilter
|
||||
EOF
|
||||
|
||||
modprobe overlay
|
||||
modprobe br_netfilter
|
||||
modprobe overlay
|
||||
modprobe br_netfilter
|
||||
|
||||
# ---------- Step 8: Sysctl ----------
|
||||
log "Configuring Kubernetes sysctl settings"
|
||||
cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF'
|
||||
cat >/etc/sysctl.d/90-kubernetes.conf <<'EOF'
|
||||
net.bridge.bridge-nf-call-iptables = 1
|
||||
net.bridge.bridge-nf-call-ip6tables = 1
|
||||
net.ipv4.ip_forward = 1
|
||||
EOF
|
||||
|
||||
sysctl --system
|
||||
sysctl --system >/dev/null
|
||||
}
|
||||
|
||||
# ---------- Step 9: Configure containerd ----------
|
||||
log "Configuring containerd"
|
||||
mkdir -p /etc/containerd
|
||||
configure_networkmanager() {
|
||||
if systemctl is-enabled NetworkManager >/dev/null 2>&1 || systemctl is-active NetworkManager >/dev/null 2>&1; then
|
||||
log "Configuring NetworkManager to ignore CNI interfaces"
|
||||
|
||||
if [[ ! -f /etc/containerd/config.toml ]]; then
|
||||
containerd config default >/etc/containerd/config.toml
|
||||
else
|
||||
cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S)
|
||||
fi
|
||||
|
||||
sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
|
||||
|
||||
# ---------- Step 10: Install kubelet service ----------
|
||||
install_kubelet_service
|
||||
|
||||
# ---------- Step 11: Clean previous worker state ----------
|
||||
cleanup_old_k8s_state
|
||||
|
||||
# ---------- Step 12: Enable services ----------
|
||||
log "Enabling and starting containerd and kubelet"
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now containerd
|
||||
systemctl enable --now kubelet
|
||||
|
||||
# ---------- Step 13: Wait for containerd ----------
|
||||
log "Waiting for containerd to become active"
|
||||
for i in {1..20}; do
|
||||
if systemctl is-active --quiet containerd; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
systemctl is-active --quiet containerd || die "containerd did not start successfully"
|
||||
|
||||
# ---------- Step 14: Verify pinned versions ----------
|
||||
log "Verifying installed Kubernetes component versions"
|
||||
KUBEADM_VERSION="$("${KUBEADM_BIN}" version -o short 2>/dev/null || true)"
|
||||
KUBECTL_VERSION="$("${KUBECTL_BIN}" version --client -o json 2>/dev/null | jq -r '.clientVersion.gitVersion // empty')"
|
||||
KUBELET_VERSION="$("${KUBELET_BIN}" --version 2>/dev/null | awk '{print $2}')"
|
||||
|
||||
[[ "${KUBEADM_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubeadm version mismatch: ${KUBEADM_VERSION}"
|
||||
[[ "${KUBECTL_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubectl version mismatch: ${KUBECTL_VERSION}"
|
||||
[[ "${KUBELET_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die "kubelet version mismatch: ${KUBELET_VERSION}"
|
||||
|
||||
# ---------- Step 15: Write join hint ----------
|
||||
log "Writing manual join hint"
|
||||
cat >"${JOIN_HINT_FILE}" <<'EOF'
|
||||
Run your worker join command manually, for example:
|
||||
|
||||
sudo kubeadm join <CONTROL_PLANE_IP>:6443 --token <TOKEN> \
|
||||
--discovery-token-ca-cert-hash sha256:<HASH>
|
||||
mkdir -p /etc/NetworkManager/conf.d
|
||||
cat >/etc/NetworkManager/conf.d/rke2-cni.conf <<'EOF'
|
||||
[keyfile]
|
||||
unmanaged-devices=interface-name:cali*;interface-name:flannel*;interface-name:cni*;interface-name:vxlan.calico;interface-name:kube-ipvs0;interface-name:nodelocaldns;interface-name:tunl*
|
||||
EOF
|
||||
chmod 600 "${JOIN_HINT_FILE}"
|
||||
|
||||
# ---------- Final output ----------
|
||||
echo
|
||||
echo "Worker node preparation is complete."
|
||||
echo
|
||||
echo "Pinned Kubernetes version:"
|
||||
echo " ${K8S_VERSION}"
|
||||
echo
|
||||
echo "Installed binaries:"
|
||||
echo " ${KUBEADM_BIN}"
|
||||
echo " ${KUBECTL_BIN}"
|
||||
echo " ${KUBELET_BIN}"
|
||||
echo
|
||||
echo "Services:"
|
||||
echo " containerd: $(systemctl is-active containerd || true)"
|
||||
echo " kubelet: $(systemctl is-active kubelet || true)"
|
||||
echo
|
||||
echo "Next step:"
|
||||
echo " Run your kubeadm join command manually on this worker."
|
||||
echo
|
||||
echo "Example hint saved to:"
|
||||
echo " ${JOIN_HINT_FILE}"
|
||||
echo
|
||||
echo "Example:"
|
||||
echo " sudo kubeadm join <CONTROL_PLANE_IP>:6443 --token <TOKEN> \\"
|
||||
echo " --discovery-token-ca-cert-hash sha256:<HASH>"
|
||||
echo
|
||||
systemctl restart NetworkManager
|
||||
fi
|
||||
|
||||
if systemctl list-unit-files | grep -q '^nm-cloud-setup.service'; then
|
||||
systemctl disable --now nm-cloud-setup.service || true
|
||||
fi
|
||||
if systemctl list-unit-files | grep -q '^nm-cloud-setup.timer'; then
|
||||
systemctl disable --now nm-cloud-setup.timer || true
|
||||
fi
|
||||
}
|
||||
|
||||
enable_support_services() {
|
||||
log "Enabling support services"
|
||||
|
||||
systemctl enable --now iscsid.service || true
|
||||
|
||||
# Do NOT enable nftables.service here.
|
||||
# On this Arch + RKE2 setup it can break service routing for cluster IPs.
|
||||
systemctl stop nftables.service >/dev/null 2>&1 || true
|
||||
systemctl disable nftables.service >/dev/null 2>&1 || true
|
||||
nft flush ruleset >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
install_rke2_agent() {
|
||||
log "Installing RKE2 agent ${RKE2_VERSION}"
|
||||
|
||||
mkdir -p "${RKE2_CONFIG_DIR}"
|
||||
|
||||
curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE=agent INSTALL_RKE2_VERSION="${RKE2_VERSION}" sh -
|
||||
|
||||
mkdir -p /etc/profile.d
|
||||
cat >/etc/profile.d/rke2-path.sh <<'EOF'
|
||||
export PATH=$PATH:/var/lib/rancher/rke2/bin:/usr/local/bin
|
||||
EOF
|
||||
}
|
||||
|
||||
write_config_if_possible() {
|
||||
log "Writing RKE2 agent config"
|
||||
|
||||
{
|
||||
if [[ -n "${SERVER_URL}" ]]; then
|
||||
echo "server: ${SERVER_URL}"
|
||||
fi
|
||||
|
||||
if [[ -n "${RKE2_TOKEN}" ]]; then
|
||||
echo "token: ${RKE2_TOKEN}"
|
||||
fi
|
||||
|
||||
if [[ -n "${WORKER_NODE_NAME}" ]]; then
|
||||
echo "node-name: ${WORKER_NODE_NAME}"
|
||||
fi
|
||||
} > "${RKE2_CONFIG_FILE}"
|
||||
|
||||
chmod 600 "${RKE2_CONFIG_FILE}"
|
||||
}
|
||||
|
||||
start_agent_if_possible() {
|
||||
systemctl daemon-reload
|
||||
systemctl enable rke2-agent.service
|
||||
|
||||
if [[ "${START_RKE2}" != "true" ]]; then
|
||||
warn "START_RKE2=false, leaving rke2-agent disabled from startup execution"
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then
|
||||
warn "SERVER_URL and/or RKE2_TOKEN not set. Worker is prepared but not joined."
|
||||
return
|
||||
fi
|
||||
|
||||
log "Starting RKE2 agent"
|
||||
systemctl restart rke2-agent.service
|
||||
}
|
||||
|
||||
wait_for_agent() {
|
||||
if [[ "${START_RKE2}" != "true" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ -z "${SERVER_URL}" || -z "${RKE2_TOKEN}" ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
log "Waiting for rke2-agent service"
|
||||
|
||||
local waited=0
|
||||
until systemctl is-active --quiet rke2-agent.service; do
|
||||
sleep 5
|
||||
waited=$((waited + 5))
|
||||
|
||||
if (( waited % 30 == 0 )); then
|
||||
warn "rke2-agent not active yet; recent logs:"
|
||||
journalctl -u rke2-agent -n 40 --no-pager || true
|
||||
fi
|
||||
|
||||
if (( waited >= 600 )); then
|
||||
journalctl -u rke2-agent -n 200 --no-pager || true
|
||||
die "Timed out waiting for rke2-agent to become active"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
print_summary() {
|
||||
log "Worker node preparation complete"
|
||||
|
||||
echo "RKE2 version: ${RKE2_VERSION}"
|
||||
echo "Config file: ${RKE2_CONFIG_FILE}"
|
||||
echo "Server URL: ${SERVER_URL:-<not set>}"
|
||||
echo "Node name: ${WORKER_NODE_NAME:-<default hostname>}"
|
||||
echo
|
||||
|
||||
if [[ -n "${SERVER_URL}" && -n "${RKE2_TOKEN}" && "${START_RKE2}" == "true" ]]; then
|
||||
echo "Worker attempted automatic join."
|
||||
echo "Check from the master with:"
|
||||
echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide"
|
||||
echo
|
||||
echo "Local diagnostics:"
|
||||
echo " sudo systemctl status rke2-agent --no-pager"
|
||||
echo " sudo journalctl -u rke2-agent -n 200 --no-pager"
|
||||
else
|
||||
echo "Worker is installed and ready, but not joined yet."
|
||||
echo
|
||||
echo "To join later, set these in ${RKE2_CONFIG_FILE}:"
|
||||
echo " server: https://YOUR_MASTER_IP:9345"
|
||||
echo " token: YOUR_NODE_TOKEN"
|
||||
if [[ -n "${WORKER_NODE_NAME}" ]]; then
|
||||
echo " node-name: ${WORKER_NODE_NAME}"
|
||||
fi
|
||||
echo
|
||||
echo "Then run:"
|
||||
echo " sudo systemctl enable --now rke2-agent"
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
require_root
|
||||
disable_swap
|
||||
install_packages
|
||||
configure_kernel
|
||||
configure_networkmanager
|
||||
enable_support_services
|
||||
install_rke2_agent
|
||||
write_config_if_possible
|
||||
start_agent_if_possible
|
||||
wait_for_agent
|
||||
print_summary
|
||||
}
|
||||
|
||||
main "$@"
|
||||
Loading…
Reference in New Issue