427 lines
13 KiB
Bash
427 lines
13 KiB
Bash
#!/usr/bin/env bash
|
|
set -Eeuo pipefail
|
|
|
|
########################################
|
|
# Arch Linux Kubernetes Control Plane
|
|
# Fully automated master node installer
|
|
# + Helm
|
|
# + ingress-nginx
|
|
# + cert-manager
|
|
# + Rancher
|
|
########################################
|
|
|
|
# ---------- Config ----------
|
|
POD_CIDR="${POD_CIDR:-192.168.0.0/16}"
|
|
CALICO_VERSION="${CALICO_VERSION:-v3.31.4}"
|
|
|
|
KUBECONFIG_DIR_ROOT="/root/.kube"
|
|
JOIN_COMMAND_FILE="/root/kubeadm-join-command.sh"
|
|
|
|
INSTALL_HELM="${INSTALL_HELM:-true}"
|
|
INSTALL_RANCHER="${INSTALL_RANCHER:-true}"
|
|
|
|
# Single-node/lab convenience:
|
|
# Rancher, ingress-nginx, cert-manager, CoreDNS, etc. need schedulable capacity.
|
|
# On a single control-plane node, removing the control-plane taint is the simplest way.
|
|
ALLOW_WORKLOADS_ON_CONTROL_PLANE="${ALLOW_WORKLOADS_ON_CONTROL_PLANE:-true}"
|
|
|
|
# Rancher settings
|
|
RANCHER_REPO_CHANNEL="${RANCHER_REPO_CHANNEL:-stable}" # stable | latest | alpha
|
|
RANCHER_BOOTSTRAP_PASSWORD="${RANCHER_BOOTSTRAP_PASSWORD:-}"
|
|
RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" # if empty, auto-generate rancher.<NODE_IP>.sslip.io
|
|
RANCHER_REPLICAS="${RANCHER_REPLICAS:-1}" # 1 for single-node lab installs
|
|
RANCHER_NAMESPACE="${RANCHER_NAMESPACE:-cattle-system}"
|
|
|
|
# ingress-nginx settings
|
|
INGRESS_NAMESPACE="${INGRESS_NAMESPACE:-ingress-nginx}"
|
|
INGRESS_CLASS_NAME="${INGRESS_CLASS_NAME:-nginx}"
|
|
|
|
# cert-manager settings
|
|
CERT_MANAGER_NAMESPACE="${CERT_MANAGER_NAMESPACE:-cert-manager}"
|
|
|
|
# Detect the real invoking user when run with sudo
|
|
REAL_USER="${SUDO_USER:-root}"
|
|
REAL_HOME="$(getent passwd "$REAL_USER" | cut -d: -f6 || true)"
|
|
REAL_HOME="${REAL_HOME:-/root}"
|
|
REAL_KUBECONFIG_DIR="${REAL_HOME}/.kube"
|
|
|
|
# ---------- Logging ----------
|
|
log() {
|
|
echo
|
|
echo "============================================================"
|
|
echo "[INFO] $*"
|
|
echo "============================================================"
|
|
}
|
|
|
|
warn() {
|
|
echo
|
|
echo "[WARN] $*" >&2
|
|
}
|
|
|
|
die() {
|
|
echo
|
|
echo "[ERROR] $*" >&2
|
|
exit 1
|
|
}
|
|
|
|
# ---------- Helpers ----------
|
|
require_cmd() {
|
|
command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1"
|
|
}
|
|
|
|
retry() {
|
|
local attempts="${1:-10}"
|
|
local sleep_seconds="${2:-5}"
|
|
shift 2 || true
|
|
|
|
local n=1
|
|
until "$@"; do
|
|
if (( n >= attempts )); then
|
|
return 1
|
|
fi
|
|
warn "Command failed (attempt ${n}/${attempts}): $*"
|
|
sleep "${sleep_seconds}"
|
|
((n++))
|
|
done
|
|
}
|
|
|
|
helm_repo_add_force() {
|
|
local name="$1"
|
|
local url="$2"
|
|
if helm repo list 2>/dev/null | awk '{print $1}' | grep -qx "${name}"; then
|
|
helm repo add "${name}" "${url}" --force-update >/dev/null
|
|
else
|
|
helm repo add "${name}" "${url}" >/dev/null
|
|
fi
|
|
}
|
|
|
|
kubectl_ns_apply() {
|
|
local ns="$1"
|
|
kubectl create namespace "${ns}" --dry-run=client -o yaml | kubectl apply -f -
|
|
}
|
|
|
|
# ---------- Root check ----------
|
|
if [[ "${EUID}" -ne 0 ]]; then
|
|
die "Run this script as root, for example: sudo ./master_node_install.sh"
|
|
fi
|
|
|
|
# ---------- Cleanup on error ----------
|
|
on_error() {
|
|
local exit_code=$?
|
|
warn "Script failed on line $1 with exit code ${exit_code}"
|
|
warn "Useful diagnostics:"
|
|
echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200"
|
|
echo " systemctl status containerd kubelet --no-pager"
|
|
echo " kubectl get nodes -o wide"
|
|
echo " kubectl get pods -A"
|
|
exit "${exit_code}"
|
|
}
|
|
trap 'on_error $LINENO' ERR
|
|
|
|
# ---------- Step 1: Disable swap ----------
|
|
log "Disabling swap immediately"
|
|
swapoff -a || true
|
|
|
|
log "Disabling swap persistently in /etc/fstab"
|
|
if [[ -f /etc/fstab ]]; then
|
|
cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S)
|
|
sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab
|
|
fi
|
|
|
|
# ---------- Step 2: Update system ----------
|
|
log "Updating package databases and system packages"
|
|
pacman -Syu --noconfirm
|
|
|
|
# ---------- Step 3: Resolve iptables conflict automatically ----------
|
|
log "Resolving iptables backend for Kubernetes"
|
|
if pacman -Q iptables >/dev/null 2>&1; then
|
|
log "Removing legacy iptables package so iptables-nft can be installed"
|
|
pacman -Rdd --noconfirm iptables || true
|
|
fi
|
|
|
|
# ---------- Step 4: Install required packages ----------
|
|
log "Installing Kubernetes and runtime packages"
|
|
pacman -S --needed --noconfirm \
|
|
ca-certificates \
|
|
curl \
|
|
containerd \
|
|
cni-plugins \
|
|
crictl \
|
|
ethtool \
|
|
iptables-nft \
|
|
conntrack-tools \
|
|
socat \
|
|
kubeadm \
|
|
kubectl \
|
|
kubelet \
|
|
tar \
|
|
gzip \
|
|
jq \
|
|
openssl \
|
|
helm
|
|
|
|
# ---------- Step 5: Kernel modules ----------
|
|
log "Configuring required kernel modules"
|
|
cat >/etc/modules-load.d/k8s.conf <<'EOF'
|
|
overlay
|
|
br_netfilter
|
|
EOF
|
|
|
|
modprobe overlay
|
|
modprobe br_netfilter
|
|
|
|
# ---------- Step 6: Sysctl ----------
|
|
log "Configuring Kubernetes sysctl settings"
|
|
cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF'
|
|
net.bridge.bridge-nf-call-iptables = 1
|
|
net.bridge.bridge-nf-call-ip6tables = 1
|
|
net.ipv4.ip_forward = 1
|
|
EOF
|
|
|
|
sysctl --system
|
|
|
|
# ---------- Step 7: containerd config ----------
|
|
log "Configuring containerd"
|
|
mkdir -p /etc/containerd
|
|
|
|
if [[ ! -f /etc/containerd/config.toml ]]; then
|
|
containerd config default >/etc/containerd/config.toml
|
|
else
|
|
cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S)
|
|
fi
|
|
|
|
# Ensure SystemdCgroup = true
|
|
sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml
|
|
|
|
# ---------- Step 8: Enable services ----------
|
|
log "Enabling and starting containerd and kubelet"
|
|
systemctl daemon-reload
|
|
systemctl enable --now containerd
|
|
systemctl enable --now kubelet
|
|
|
|
# ---------- Step 9: Wait for containerd ----------
|
|
log "Waiting for containerd to become active"
|
|
for i in {1..20}; do
|
|
if systemctl is-active --quiet containerd; then
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
systemctl is-active --quiet containerd || die "containerd did not start successfully"
|
|
|
|
# ---------- Step 10: Pre-pull Kubernetes images ----------
|
|
log "Pulling Kubernetes control-plane images"
|
|
kubeadm config images pull
|
|
|
|
# ---------- Step 11: Initialize cluster ----------
|
|
if [[ -f /etc/kubernetes/admin.conf ]]; then
|
|
warn "/etc/kubernetes/admin.conf already exists; skipping kubeadm init"
|
|
else
|
|
log "Initializing Kubernetes control plane"
|
|
kubeadm init --pod-network-cidr="${POD_CIDR}"
|
|
fi
|
|
|
|
# ---------- Step 12: Configure kubectl for root ----------
|
|
log "Configuring kubectl for root"
|
|
mkdir -p "${KUBECONFIG_DIR_ROOT}"
|
|
cp -f /etc/kubernetes/admin.conf "${KUBECONFIG_DIR_ROOT}/config"
|
|
chmod 600 "${KUBECONFIG_DIR_ROOT}/config"
|
|
|
|
export KUBECONFIG=/etc/kubernetes/admin.conf
|
|
|
|
# ---------- Step 13: Configure kubectl for invoking user ----------
|
|
if [[ -n "${REAL_HOME}" && -d "${REAL_HOME}" ]]; then
|
|
log "Configuring kubectl for user ${REAL_USER}"
|
|
mkdir -p "${REAL_KUBECONFIG_DIR}"
|
|
cp -f /etc/kubernetes/admin.conf "${REAL_KUBECONFIG_DIR}/config"
|
|
chown -R "${REAL_USER}:${REAL_USER}" "${REAL_KUBECONFIG_DIR}"
|
|
chmod 600 "${REAL_KUBECONFIG_DIR}/config"
|
|
else
|
|
warn "Could not determine invoking user's home directory; skipping user kubeconfig setup"
|
|
fi
|
|
|
|
require_cmd kubectl
|
|
require_cmd kubeadm
|
|
require_cmd helm
|
|
|
|
# ---------- Step 14: Wait for API ----------
|
|
log "Waiting for Kubernetes API to become responsive"
|
|
retry 60 5 kubectl version --request-timeout=10s >/dev/null
|
|
|
|
# ---------- Step 15: Optionally allow workloads on control-plane ----------
|
|
if [[ "${ALLOW_WORKLOADS_ON_CONTROL_PLANE}" == "true" ]]; then
|
|
log "Allowing workloads on the control-plane node (single-node/lab mode)"
|
|
kubectl taint nodes --all node-role.kubernetes.io/control-plane- >/dev/null 2>&1 || true
|
|
kubectl taint nodes --all node-role.kubernetes.io/master- >/dev/null 2>&1 || true
|
|
fi
|
|
|
|
# ---------- Step 16: Install Calico ----------
|
|
log "Installing Calico networking"
|
|
kubectl apply -f "https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/calico.yaml"
|
|
|
|
# ---------- Step 17: Wait for node readiness ----------
|
|
log "Waiting for node(s) to become Ready"
|
|
kubectl wait --for=condition=Ready node --all --timeout=10m
|
|
|
|
# ---------- Step 18: Wait for Calico ----------
|
|
log "Waiting for Calico components"
|
|
kubectl -n kube-system rollout status daemonset/calico-node --timeout=10m || true
|
|
kubectl -n kube-system rollout status deployment/calico-kube-controllers --timeout=10m || true
|
|
|
|
# ---------- Step 19: Save worker join command ----------
|
|
log "Saving worker join command"
|
|
kubeadm token create --print-join-command > "${JOIN_COMMAND_FILE}"
|
|
chmod 700 "${JOIN_COMMAND_FILE}"
|
|
|
|
# ---------- Step 20: Determine node info ----------
|
|
log "Determining control-plane node information"
|
|
NODE_NAME="$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}')"
|
|
NODE_IP="$(kubectl get node "${NODE_NAME}" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')"
|
|
|
|
if [[ -z "${NODE_NAME}" || -z "${NODE_IP}" ]]; then
|
|
die "Failed to determine node name or node IP"
|
|
fi
|
|
|
|
if [[ -z "${RANCHER_HOSTNAME}" ]]; then
|
|
RANCHER_HOSTNAME="rancher.${NODE_IP}.sslip.io"
|
|
fi
|
|
|
|
if [[ -z "${RANCHER_BOOTSTRAP_PASSWORD}" ]]; then
|
|
RANCHER_BOOTSTRAP_PASSWORD="$(openssl rand -base64 24 | tr -d '\n' | tr '/+' 'AB' | cut -c1-20)"
|
|
fi
|
|
|
|
echo "${RANCHER_BOOTSTRAP_PASSWORD}" >/root/rancher-bootstrap-password.txt
|
|
chmod 600 /root/rancher-bootstrap-password.txt
|
|
|
|
# ---------- Step 21: Install Helm repos ----------
|
|
if [[ "${INSTALL_HELM}" == "true" ]]; then
|
|
log "Configuring Helm repositories"
|
|
helm_repo_add_force ingress-nginx https://kubernetes.github.io/ingress-nginx
|
|
helm_repo_add_force jetstack https://charts.jetstack.io
|
|
|
|
case "${RANCHER_REPO_CHANNEL}" in
|
|
stable)
|
|
helm_repo_add_force rancher-stable https://releases.rancher.com/server-charts/stable
|
|
RANCHER_CHART="rancher-stable/rancher"
|
|
;;
|
|
latest)
|
|
helm_repo_add_force rancher-latest https://releases.rancher.com/server-charts/latest
|
|
RANCHER_CHART="rancher-latest/rancher"
|
|
;;
|
|
alpha)
|
|
helm_repo_add_force rancher-alpha https://releases.rancher.com/server-charts/alpha
|
|
RANCHER_CHART="rancher-alpha/rancher"
|
|
;;
|
|
*)
|
|
die "Invalid RANCHER_REPO_CHANNEL: ${RANCHER_REPO_CHANNEL} (expected: stable, latest, alpha)"
|
|
;;
|
|
esac
|
|
|
|
helm repo update
|
|
fi
|
|
|
|
# ---------- Step 22: Install ingress-nginx ----------
|
|
if [[ "${INSTALL_RANCHER}" == "true" ]]; then
|
|
log "Installing ingress-nginx"
|
|
|
|
kubectl_ns_apply "${INGRESS_NAMESPACE}"
|
|
|
|
helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \
|
|
--namespace "${INGRESS_NAMESPACE}" \
|
|
--create-namespace \
|
|
--set controller.kind=DaemonSet \
|
|
--set controller.hostNetwork=true \
|
|
--set controller.dnsPolicy=ClusterFirstWithHostNet \
|
|
--set controller.service.type=ClusterIP \
|
|
--set controller.ingressClass="${INGRESS_CLASS_NAME}" \
|
|
--set controller.ingressClassResource.name="${INGRESS_CLASS_NAME}" \
|
|
--set controller.ingressClassResource.default=true \
|
|
--set controller.watchIngressWithoutClass=true \
|
|
--set controller.reportNodeInternalIp=true \
|
|
--wait \
|
|
--timeout 15m
|
|
|
|
log "Waiting for ingress-nginx controller"
|
|
kubectl -n "${INGRESS_NAMESPACE}" rollout status daemonset/ingress-nginx-controller --timeout=15m
|
|
fi
|
|
|
|
# ---------- Step 23: Install cert-manager ----------
|
|
if [[ "${INSTALL_RANCHER}" == "true" ]]; then
|
|
log "Installing cert-manager"
|
|
|
|
kubectl_ns_apply "${CERT_MANAGER_NAMESPACE}"
|
|
|
|
helm upgrade --install cert-manager jetstack/cert-manager \
|
|
--namespace "${CERT_MANAGER_NAMESPACE}" \
|
|
--create-namespace \
|
|
--set crds.enabled=true \
|
|
--wait \
|
|
--timeout 15m
|
|
|
|
log "Waiting for cert-manager deployments"
|
|
kubectl -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager --timeout=15m
|
|
kubectl -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-cainjector --timeout=15m
|
|
kubectl -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-webhook --timeout=15m
|
|
fi
|
|
|
|
# ---------- Step 24: Install Rancher ----------
|
|
if [[ "${INSTALL_RANCHER}" == "true" ]]; then
|
|
log "Installing Rancher"
|
|
|
|
kubectl_ns_apply "${RANCHER_NAMESPACE}"
|
|
|
|
helm upgrade --install rancher "${RANCHER_CHART}" \
|
|
--namespace "${RANCHER_NAMESPACE}" \
|
|
--create-namespace \
|
|
--set hostname="${RANCHER_HOSTNAME}" \
|
|
--set bootstrapPassword="${RANCHER_BOOTSTRAP_PASSWORD}" \
|
|
--set replicas="${RANCHER_REPLICAS}" \
|
|
--set ingress.ingressClassName="${INGRESS_CLASS_NAME}" \
|
|
--wait \
|
|
--timeout 20m
|
|
|
|
log "Waiting for Rancher rollout"
|
|
kubectl -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher --timeout=20m || true
|
|
fi
|
|
|
|
# ---------- Step 25: Show cluster status ----------
|
|
log "Cluster status"
|
|
kubectl get nodes -o wide || true
|
|
echo
|
|
kubectl get pods -A || true
|
|
echo
|
|
kubectl get ingress -A || true
|
|
|
|
# ---------- Final output ----------
|
|
echo
|
|
echo "Kubernetes control plane installation is complete."
|
|
echo
|
|
echo "kubectl configured for:"
|
|
echo " root: ${KUBECONFIG_DIR_ROOT}/config"
|
|
echo " ${REAL_USER}: ${REAL_KUBECONFIG_DIR}/config"
|
|
echo
|
|
echo "Worker join command saved to:"
|
|
echo " ${JOIN_COMMAND_FILE}"
|
|
echo
|
|
echo "To view it:"
|
|
echo " sudo cat ${JOIN_COMMAND_FILE}"
|
|
echo
|
|
|
|
if [[ "${INSTALL_RANCHER}" == "true" ]]; then
|
|
echo "Rancher install completed."
|
|
echo
|
|
echo "Rancher URL:"
|
|
echo " https://${RANCHER_HOSTNAME}"
|
|
echo
|
|
echo "Rancher bootstrap password saved to:"
|
|
echo " /root/rancher-bootstrap-password.txt"
|
|
echo
|
|
echo "To view it:"
|
|
echo " sudo cat /root/rancher-bootstrap-password.txt"
|
|
echo
|
|
echo "Notes:"
|
|
echo " - sslip.io is used automatically when RANCHER_HOSTNAME is not set."
|
|
echo " - Because ingress-nginx is using host networking, access Rancher directly on this node's IP over 443."
|
|
echo " - If a local firewall is enabled, ensure ports 80 and 443 are allowed."
|
|
echo
|
|
fi |