#!/usr/bin/env bash set -Eeuo pipefail ######################################## # Arch Linux Kubernetes Control Plane # Fully automated master node installer # + Official Kubernetes binaries pinned to 1.34.x # + Helm # + ingress-nginx # + cert-manager # + Rancher ######################################## # ---------- Config ---------- POD_CIDR="${POD_CIDR:-192.168.0.0/16}" CALICO_VERSION="${CALICO_VERSION:-v3.31.4}" # Rancher-compatible Kubernetes version K8S_VERSION="${K8S_VERSION:-v1.34.6}" K8S_SERIES_REGEX='^v1\.34\.[0-9]+$' K8S_ARCH="${K8S_ARCH:-amd64}" KUBECONFIG_DIR_ROOT="/root/.kube" JOIN_COMMAND_FILE="/root/kubeadm-join-command.sh" INSTALL_HELM="${INSTALL_HELM:-true}" INSTALL_RANCHER="${INSTALL_RANCHER:-true}" ALLOW_WORKLOADS_ON_CONTROL_PLANE="${ALLOW_WORKLOADS_ON_CONTROL_PLANE:-true}" # Rancher settings RANCHER_REPO_CHANNEL="${RANCHER_REPO_CHANNEL:-stable}" # stable | latest | alpha RANCHER_BOOTSTRAP_PASSWORD="${RANCHER_BOOTSTRAP_PASSWORD:-}" RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" # auto -> rancher..sslip.io RANCHER_REPLICAS="${RANCHER_REPLICAS:-1}" RANCHER_NAMESPACE="${RANCHER_NAMESPACE:-cattle-system}" # ingress-nginx settings INGRESS_NAMESPACE="${INGRESS_NAMESPACE:-ingress-nginx}" INGRESS_CLASS_NAME="${INGRESS_CLASS_NAME:-nginx}" # cert-manager settings CERT_MANAGER_NAMESPACE="${CERT_MANAGER_NAMESPACE:-cert-manager}" # Binary locations KUBEADM_BIN="/usr/local/bin/kubeadm" KUBECTL_BIN="/usr/local/bin/kubectl" KUBELET_BIN="/usr/local/bin/kubelet" # Detect the real invoking user when run with sudo REAL_USER="${SUDO_USER:-root}" REAL_HOME="$(getent passwd "$REAL_USER" | cut -d: -f6 || true)" REAL_HOME="${REAL_HOME:-/root}" REAL_KUBECONFIG_DIR="${REAL_HOME}/.kube" # ---------- Logging ---------- log() { echo echo "============================================================" echo "[INFO] $*" echo "============================================================" } warn() { echo echo "[WARN] $*" >&2 } die() { echo echo "[ERROR] $*" >&2 exit 1 } # ---------- Helpers ---------- require_cmd() { command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1" } retry() { local attempts="${1:-10}" local sleep_seconds="${2:-5}" shift 2 || true local n=1 until "$@"; do if (( n >= attempts )); then return 1 fi warn "Command failed (attempt ${n}/${attempts}): $*" sleep "${sleep_seconds}" ((n++)) done } helm_repo_add_force() { local name="$1" local url="$2" if helm repo list 2>/dev/null | awk '{print $1}' | grep -qx "${name}"; then helm repo add "${name}" "${url}" --force-update >/dev/null else helm repo add "${name}" "${url}" >/dev/null fi } kubectl_ns_apply() { local ns="$1" "${KUBECTL_BIN}" create namespace "${ns}" --dry-run=client -o yaml | "${KUBECTL_BIN}" apply -f - } download_k8s_binary() { local name="$1" local tmpdir tmpdir="$(mktemp -d)" curl -fsSL -o "${tmpdir}/${name}" \ "https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}" curl -fsSL -o "${tmpdir}/${name}.sha256" \ "https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}.sha256" ( cd "${tmpdir}" echo "$(cat "${name}.sha256") ${name}" | sha256sum --check --status ) || die "Checksum verification failed for ${name} ${K8S_VERSION}" install -o root -g root -m 0755 "${tmpdir}/${name}" "/usr/local/bin/${name}" rm -rf "${tmpdir}" } install_kubelet_service() { log "Installing kubelet systemd service" mkdir -p /etc/systemd/system/kubelet.service.d touch /etc/default/kubelet cat >/etc/systemd/system/kubelet.service <<'EOF' [Unit] Description=kubelet: The Kubernetes Node Agent Documentation=https://kubernetes.io/docs/ After=containerd.service network-online.target Wants=network-online.target Requires=containerd.service [Service] ExecStart=/usr/local/bin/kubelet Restart=always StartLimitInterval=0 RestartSec=10 [Install] WantedBy=multi-user.target EOF cat >/etc/systemd/system/kubelet.service.d/10-kubeadm.conf <<'EOF' [Service] Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf" Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml" EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env EnvironmentFile=-/etc/default/kubelet ExecStart= ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS EOF } existing_cluster_version() { if [[ -f /etc/kubernetes/admin.conf ]]; then "${KUBECTL_BIN}" --kubeconfig=/etc/kubernetes/admin.conf version -o json 2>/dev/null | \ jq -r '.serverVersion.gitVersion // empty' fi } ensure_rancher_supported_k8s() { [[ "${K8S_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \ "Rancher is enabled, but K8S_VERSION=${K8S_VERSION} is not a 1.34.x release. Set K8S_VERSION to a supported 1.34.x patch release." } # ---------- Root check ---------- if [[ "${EUID}" -ne 0 ]]; then die "Run this script as root, for example: sudo ./master_node_install.sh" fi # ---------- Cleanup on error ---------- on_error() { local exit_code=$? warn "Script failed on line $1 with exit code ${exit_code}" warn "Useful diagnostics:" echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200" echo " systemctl status containerd kubelet --no-pager" echo " ${KUBECTL_BIN} get nodes -o wide" echo " ${KUBECTL_BIN} get pods -A" exit "${exit_code}" } trap 'on_error $LINENO' ERR if [[ "${INSTALL_RANCHER}" == "true" ]]; then ensure_rancher_supported_k8s fi # ---------- Step 1: Disable swap ---------- log "Disabling swap immediately" swapoff -a || true log "Disabling swap persistently in /etc/fstab" if [[ -f /etc/fstab ]]; then cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S) sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab fi # ---------- Step 2: Update system ---------- log "Updating package databases and system packages" pacman -Syu --noconfirm # ---------- Step 3: Resolve iptables conflict automatically ---------- log "Resolving iptables backend for Kubernetes" if pacman -Q iptables >/dev/null 2>&1; then log "Removing legacy iptables package so iptables-nft can be installed" pacman -Rdd --noconfirm iptables || true fi # ---------- Step 4: Install required Arch packages ---------- log "Installing runtime and support packages from Arch" pacman -S --needed --noconfirm \ ca-certificates \ curl \ containerd \ cni-plugins \ crictl \ ethtool \ iptables-nft \ conntrack-tools \ socat \ tar \ gzip \ jq \ openssl \ helm # ---------- Step 5: Remove Arch Kubernetes packages if present ---------- log "Removing Arch-provided kubeadm/kubectl/kubelet if present" for pkg in kubeadm kubectl kubelet; do if pacman -Q "${pkg}" >/dev/null 2>&1; then pacman -Rdd --noconfirm "${pkg}" || true fi done # ---------- Step 6: Install pinned Kubernetes binaries ---------- log "Installing Kubernetes binaries ${K8S_VERSION}" download_k8s_binary kubeadm download_k8s_binary kubectl download_k8s_binary kubelet require_cmd "${KUBEADM_BIN}" require_cmd "${KUBECTL_BIN}" require_cmd "${KUBELET_BIN}" # ---------- Step 7: Kernel modules ---------- log "Configuring required kernel modules" cat >/etc/modules-load.d/k8s.conf <<'EOF' overlay br_netfilter EOF modprobe overlay modprobe br_netfilter # ---------- Step 8: Sysctl ---------- log "Configuring Kubernetes sysctl settings" cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF' net.bridge.bridge-nf-call-iptables = 1 net.bridge.bridge-nf-call-ip6tables = 1 net.ipv4.ip_forward = 1 EOF sysctl --system # ---------- Step 9: containerd config ---------- log "Configuring containerd" mkdir -p /etc/containerd if [[ ! -f /etc/containerd/config.toml ]]; then containerd config default >/etc/containerd/config.toml else cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S) fi sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml # ---------- Step 10: kubelet service ---------- install_kubelet_service # ---------- Step 11: Enable services ---------- log "Enabling and starting containerd and kubelet" systemctl daemon-reload systemctl enable --now containerd systemctl enable --now kubelet # ---------- Step 12: Wait for containerd ---------- log "Waiting for containerd to become active" for i in {1..20}; do if systemctl is-active --quiet containerd; then break fi sleep 1 done systemctl is-active --quiet containerd || die "containerd did not start successfully" # ---------- Step 13: Handle existing cluster ---------- EXISTING_CLUSTER_VERSION="$(existing_cluster_version || true)" if [[ -n "${EXISTING_CLUSTER_VERSION}" ]]; then log "Detected existing Kubernetes cluster: ${EXISTING_CLUSTER_VERSION}" if [[ "${EXISTING_CLUSTER_VERSION}" != "${K8S_VERSION}" ]]; then die "Existing cluster version is ${EXISTING_CLUSTER_VERSION}, but this script is pinned to ${K8S_VERSION}. Reset/rebuild the cluster before rerunning." fi fi # ---------- Step 14: Pre-pull Kubernetes images ---------- log "Pulling Kubernetes control-plane images" "${KUBEADM_BIN}" config images pull --kubernetes-version="${K8S_VERSION}" # ---------- Step 15: Initialize cluster ---------- if [[ -f /etc/kubernetes/admin.conf ]]; then warn "/etc/kubernetes/admin.conf already exists; skipping kubeadm init" else log "Initializing Kubernetes control plane" "${KUBEADM_BIN}" init \ --kubernetes-version="${K8S_VERSION}" \ --pod-network-cidr="${POD_CIDR}" fi # ---------- Step 16: Configure kubectl for root ---------- log "Configuring kubectl for root" mkdir -p "${KUBECONFIG_DIR_ROOT}" cp -f /etc/kubernetes/admin.conf "${KUBECONFIG_DIR_ROOT}/config" chmod 600 "${KUBECONFIG_DIR_ROOT}/config" export KUBECONFIG=/etc/kubernetes/admin.conf # ---------- Step 17: Configure kubectl for invoking user ---------- if [[ -n "${REAL_HOME}" && -d "${REAL_HOME}" ]]; then log "Configuring kubectl for user ${REAL_USER}" mkdir -p "${REAL_KUBECONFIG_DIR}" cp -f /etc/kubernetes/admin.conf "${REAL_KUBECONFIG_DIR}/config" chown -R "${REAL_USER}:${REAL_USER}" "${REAL_KUBECONFIG_DIR}" chmod 600 "${REAL_KUBECONFIG_DIR}/config" else warn "Could not determine invoking user's home directory; skipping user kubeconfig setup" fi # ---------- Step 18: Verify cluster version ---------- log "Verifying Kubernetes server version" SERVER_VERSION="$("${KUBECTL_BIN}" version -o json | jq -r '.serverVersion.gitVersion')" [[ "${SERVER_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \ "Cluster server version ${SERVER_VERSION} is not a supported 1.34.x release for this Rancher workflow." # ---------- Step 19: Wait for API ---------- log "Waiting for Kubernetes API to become responsive" retry 60 5 "${KUBECTL_BIN}" version --request-timeout=10s >/dev/null # ---------- Step 20: Optionally allow workloads on control-plane ---------- if [[ "${ALLOW_WORKLOADS_ON_CONTROL_PLANE}" == "true" ]]; then log "Allowing workloads on the control-plane node (single-node/lab mode)" "${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/control-plane- >/dev/null 2>&1 || true "${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/master- >/dev/null 2>&1 || true fi # ---------- Step 21: Install Calico ---------- log "Installing Calico networking" "${KUBECTL_BIN}" apply -f "https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/calico.yaml" # ---------- Step 22: Wait for node readiness ---------- log "Waiting for node(s) to become Ready" "${KUBECTL_BIN}" wait --for=condition=Ready node --all --timeout=10m # ---------- Step 23: Wait for Calico ---------- log "Waiting for Calico components" "${KUBECTL_BIN}" -n kube-system rollout status daemonset/calico-node --timeout=10m || true "${KUBECTL_BIN}" -n kube-system rollout status deployment/calico-kube-controllers --timeout=10m || true # ---------- Step 24: Save worker join command ---------- log "Saving worker join command" "${KUBEADM_BIN}" token create --print-join-command > "${JOIN_COMMAND_FILE}" chmod 700 "${JOIN_COMMAND_FILE}" # ---------- Step 25: Determine node info ---------- log "Determining control-plane node information" NODE_NAME="$("${KUBECTL_BIN}" get nodes -o jsonpath='{.items[0].metadata.name}')" NODE_IP="$("${KUBECTL_BIN}" get node "${NODE_NAME}" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')" if [[ -z "${NODE_NAME}" || -z "${NODE_IP}" ]]; then die "Failed to determine node name or node IP" fi if [[ -z "${RANCHER_HOSTNAME}" ]]; then RANCHER_HOSTNAME="rancher.${NODE_IP}.sslip.io" fi if [[ -z "${RANCHER_BOOTSTRAP_PASSWORD}" ]]; then RANCHER_BOOTSTRAP_PASSWORD="$(openssl rand -base64 24 | tr -d '\n' | tr '/+' 'AB' | cut -c1-20)" fi echo "${RANCHER_BOOTSTRAP_PASSWORD}" >/root/rancher-bootstrap-password.txt chmod 600 /root/rancher-bootstrap-password.txt # ---------- Step 26: Install Helm repos ---------- if [[ "${INSTALL_HELM}" == "true" ]]; then log "Configuring Helm repositories" helm_repo_add_force ingress-nginx https://kubernetes.github.io/ingress-nginx helm_repo_add_force jetstack https://charts.jetstack.io case "${RANCHER_REPO_CHANNEL}" in stable) helm_repo_add_force rancher-stable https://releases.rancher.com/server-charts/stable RANCHER_CHART="rancher-stable/rancher" ;; latest) helm_repo_add_force rancher-latest https://releases.rancher.com/server-charts/latest RANCHER_CHART="rancher-latest/rancher" ;; alpha) helm_repo_add_force rancher-alpha https://releases.rancher.com/server-charts/alpha RANCHER_CHART="rancher-alpha/rancher" ;; *) die "Invalid RANCHER_REPO_CHANNEL: ${RANCHER_REPO_CHANNEL} (expected: stable, latest, alpha)" ;; esac helm repo update fi # ---------- Step 27: Install ingress-nginx ---------- if [[ "${INSTALL_RANCHER}" == "true" ]]; then log "Installing ingress-nginx" kubectl_ns_apply "${INGRESS_NAMESPACE}" helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ --namespace "${INGRESS_NAMESPACE}" \ --create-namespace \ --set controller.kind=DaemonSet \ --set controller.hostNetwork=true \ --set controller.dnsPolicy=ClusterFirstWithHostNet \ --set controller.service.type=ClusterIP \ --set controller.ingressClass="${INGRESS_CLASS_NAME}" \ --set controller.ingressClassResource.name="${INGRESS_CLASS_NAME}" \ --set controller.ingressClassResource.default=true \ --set controller.watchIngressWithoutClass=true \ --set controller.reportNodeInternalIp=true \ --wait \ --timeout 15m log "Waiting for ingress-nginx controller" "${KUBECTL_BIN}" -n "${INGRESS_NAMESPACE}" rollout status daemonset/ingress-nginx-controller --timeout=15m fi # ---------- Step 28: Install cert-manager ---------- if [[ "${INSTALL_RANCHER}" == "true" ]]; then log "Installing cert-manager" kubectl_ns_apply "${CERT_MANAGER_NAMESPACE}" helm upgrade --install cert-manager jetstack/cert-manager \ --namespace "${CERT_MANAGER_NAMESPACE}" \ --create-namespace \ --set crds.enabled=true \ --wait \ --timeout 15m log "Waiting for cert-manager deployments" "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager --timeout=15m "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-cainjector --timeout=15m "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-webhook --timeout=15m fi # ---------- Step 29: Install Rancher ---------- if [[ "${INSTALL_RANCHER}" == "true" ]]; then log "Installing Rancher" kubectl_ns_apply "${RANCHER_NAMESPACE}" helm upgrade --install rancher "${RANCHER_CHART}" \ --namespace "${RANCHER_NAMESPACE}" \ --create-namespace \ --set hostname="${RANCHER_HOSTNAME}" \ --set bootstrapPassword="${RANCHER_BOOTSTRAP_PASSWORD}" \ --set replicas="${RANCHER_REPLICAS}" \ --set ingress.ingressClassName="${INGRESS_CLASS_NAME}" \ --set ingress.tls.source=rancher \ --wait \ --timeout 20m log "Waiting for Rancher rollout" "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher --timeout=20m || true fi # ---------- Step 30: Show cluster status ---------- log "Cluster status" "${KUBECTL_BIN}" get nodes -o wide || true echo "${KUBECTL_BIN}" get pods -A || true echo "${KUBECTL_BIN}" get ingress -A || true # ---------- Final output ---------- echo echo "Kubernetes control plane installation is complete." echo echo "Pinned Kubernetes version:" echo " ${K8S_VERSION}" echo echo "kubectl configured for:" echo " root: ${KUBECONFIG_DIR_ROOT}/config" echo " ${REAL_USER}: ${REAL_KUBECONFIG_DIR}/config" echo echo "Worker join command saved to:" echo " ${JOIN_COMMAND_FILE}" echo echo "To view it:" echo " sudo cat ${JOIN_COMMAND_FILE}" echo if [[ "${INSTALL_RANCHER}" == "true" ]]; then echo "Rancher install completed." echo echo "Rancher URL:" echo " https://${RANCHER_HOSTNAME}" echo echo "Rancher bootstrap password saved to:" echo " /root/rancher-bootstrap-password.txt" echo echo "To view it:" echo " sudo cat /root/rancher-bootstrap-password.txt" echo echo "Notes:" echo " - Rancher is using a 1.34.x Kubernetes control plane on purpose for compatibility." echo " - ingress-nginx is using host networking, so access Rancher directly on this node's IP over 443." echo " - If a local firewall is enabled, ensure ports 80 and 443 are allowed." echo " - Rancher-generated TLS will usually produce a browser warning until you trust the cert." echo fi