kubernetes-arch-install/master_node_install.sh

210 lines
6.1 KiB
Bash

#!/usr/bin/env bash
set -Eeuo pipefail
########################################
# Arch Linux Kubernetes Control Plane
# Fully automated master node installer
########################################
# ---------- Config ----------
POD_CIDR="${POD_CIDR:-192.168.0.0/16}"
CALICO_VERSION="${CALICO_VERSION:-v3.31.4}"
KUBECONFIG_DIR_ROOT="/root/.kube"
JOIN_COMMAND_FILE="/root/kubeadm-join-command.sh"
# Detect the real invoking user when run with sudo
REAL_USER="${SUDO_USER:-root}"
REAL_HOME="$(getent passwd "$REAL_USER" | cut -d: -f6 || true)"
REAL_HOME="${REAL_HOME:-/root}"
REAL_KUBECONFIG_DIR="${REAL_HOME}/.kube"
# ---------- Logging ----------
log() {
echo
echo "============================================================"
echo "[INFO] $*"
echo "============================================================"
}
warn() {
echo
echo "[WARN] $*" >&2
}
die() {
echo
echo "[ERROR] $*" >&2
exit 1
}
# ---------- Root check ----------
if [[ "${EUID}" -ne 0 ]]; then
die "Run this script as root, for example: sudo ./master_node_install.sh"
fi
# ---------- Cleanup on error ----------
on_error() {
local exit_code=$?
warn "Script failed on line $1 with exit code ${exit_code}"
warn "Useful diagnostics:"
echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200"
echo " systemctl status containerd kubelet --no-pager"
exit "${exit_code}"
}
trap 'on_error $LINENO' ERR
# ---------- Step 1: Disable swap ----------
log "Disabling swap immediately"
swapoff -a || true
log "Disabling swap persistently in /etc/fstab"
if [[ -f /etc/fstab ]]; then
cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S)
sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab
fi
# ---------- Step 2: Update system ----------
log "Updating package databases and system packages"
pacman -Syu --noconfirm
# ---------- Step 3: Resolve iptables conflict automatically ----------
log "Resolving iptables backend for Kubernetes"
if pacman -Q iptables >/dev/null 2>&1; then
log "Removing legacy iptables package so iptables-nft can be installed"
pacman -Rdd --noconfirm iptables || true
fi
# ---------- Step 4: Install required packages ----------
log "Installing Kubernetes and runtime packages"
pacman -S --needed --noconfirm \
ca-certificates \
curl \
containerd \
cni-plugins \
crictl \
ethtool \
iptables-nft \
conntrack-tools \
socat \
kubeadm \
kubectl \
kubelet
# ---------- Step 5: Kernel modules ----------
log "Configuring required kernel modules"
cat >/etc/modules-load.d/k8s.conf <<'EOF'
overlay
br_netfilter
EOF
modprobe overlay
modprobe br_netfilter
# ---------- Step 6: Sysctl ----------
log "Configuring Kubernetes sysctl settings"
cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sysctl --system
# ---------- Step 7: containerd config ----------
log "Configuring containerd"
mkdir -p /etc/containerd
if [[ ! -f /etc/containerd/config.toml ]]; then
containerd config default >/etc/containerd/config.toml
else
cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S)
fi
# Ensure SystemdCgroup = true
sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
# Ensure sandbox image is left as default from containerd config or package
# No hardcoded pause image needed unless troubleshooting later
# ---------- Step 8: Enable services ----------
log "Enabling and starting containerd and kubelet"
systemctl daemon-reload
systemctl enable --now containerd
systemctl enable --now kubelet
# ---------- Step 9: Wait for containerd ----------
log "Waiting for containerd to become active"
for i in {1..20}; do
if systemctl is-active --quiet containerd; then
break
fi
sleep 1
done
systemctl is-active --quiet containerd || die "containerd did not start successfully"
# ---------- Step 10: Pre-pull Kubernetes images ----------
log "Pulling Kubernetes control-plane images"
kubeadm config images pull
# ---------- Step 11: Initialize cluster ----------
if [[ -f /etc/kubernetes/admin.conf ]]; then
warn "/etc/kubernetes/admin.conf already exists; skipping kubeadm init"
else
log "Initializing Kubernetes control plane"
kubeadm init --pod-network-cidr="${POD_CIDR}"
fi
# ---------- Step 12: Configure kubectl for root ----------
log "Configuring kubectl for root"
mkdir -p "${KUBECONFIG_DIR_ROOT}"
cp -f /etc/kubernetes/admin.conf "${KUBECONFIG_DIR_ROOT}/config"
chmod 600 "${KUBECONFIG_DIR_ROOT}/config"
export KUBECONFIG=/etc/kubernetes/admin.conf
# ---------- Step 13: Configure kubectl for invoking user ----------
if [[ -n "${REAL_HOME}" && -d "${REAL_HOME}" ]]; then
log "Configuring kubectl for user ${REAL_USER}"
mkdir -p "${REAL_KUBECONFIG_DIR}"
cp -f /etc/kubernetes/admin.conf "${REAL_KUBECONFIG_DIR}/config"
chown -R "${REAL_USER}:${REAL_USER}" "${REAL_KUBECONFIG_DIR}"
chmod 600 "${REAL_KUBECONFIG_DIR}/config"
else
warn "Could not determine invoking user's home directory; skipping user kubeconfig setup"
fi
# ---------- Step 14: Install Calico ----------
log "Installing Calico networking"
kubectl apply -f "https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/calico.yaml"
# ---------- Step 15: Save worker join command ----------
log "Saving worker join command"
kubeadm token create --print-join-command > "${JOIN_COMMAND_FILE}"
chmod 700 "${JOIN_COMMAND_FILE}"
# ---------- Step 16: Show cluster status ----------
log "Waiting briefly for cluster components"
sleep 10
log "Cluster status"
kubectl get nodes -o wide || true
echo
kubectl get pods -A || true
# ---------- Final output ----------
echo
echo "Kubernetes control plane installation is complete."
echo
echo "kubectl configured for:"
echo " root: ${KUBECONFIG_DIR_ROOT}/config"
echo " ${REAL_USER}: ${REAL_KUBECONFIG_DIR}/config"
echo
echo "Worker join command saved to:"
echo " ${JOIN_COMMAND_FILE}"
echo
echo "To view it:"
echo " sudo cat ${JOIN_COMMAND_FILE}"
echo
echo "If this is a single-node lab cluster and you want to schedule normal pods on the control-plane node, run:"
echo " kubectl taint nodes --all node-role.kubernetes.io/control-plane-"
echo