diff --git a/rancher_no_work_master_node_install.sh b/rancher_no_work_master_node_install.sh deleted file mode 100644 index 74c7465..0000000 --- a/rancher_no_work_master_node_install.sh +++ /dev/null @@ -1,541 +0,0 @@ -#!/usr/bin/env bash -set -Eeuo pipefail - -######################################## -# Arch Linux Kubernetes Control Plane -# Fully automated master node installer -# + Official Kubernetes binaries pinned to 1.34.x -# + Helm -# + ingress-nginx -# + cert-manager -# + Rancher -######################################## - -# ---------- Config ---------- -POD_CIDR="${POD_CIDR:-192.168.0.0/16}" -CALICO_VERSION="${CALICO_VERSION:-v3.31.4}" - -# Rancher-compatible Kubernetes version -K8S_VERSION="${K8S_VERSION:-v1.34.6}" -K8S_SERIES_REGEX='^v1\.34\.[0-9]+$' -K8S_ARCH="${K8S_ARCH:-amd64}" - -KUBECONFIG_DIR_ROOT="/root/.kube" -JOIN_COMMAND_FILE="/root/kubeadm-join-command.sh" - -INSTALL_HELM="${INSTALL_HELM:-true}" -INSTALL_RANCHER="${INSTALL_RANCHER:-true}" - -ALLOW_WORKLOADS_ON_CONTROL_PLANE="${ALLOW_WORKLOADS_ON_CONTROL_PLANE:-true}" - -# Rancher settings -RANCHER_REPO_CHANNEL="${RANCHER_REPO_CHANNEL:-stable}" # stable | latest | alpha -RANCHER_BOOTSTRAP_PASSWORD="${RANCHER_BOOTSTRAP_PASSWORD:-}" -RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" # auto -> rancher..sslip.io -RANCHER_REPLICAS="${RANCHER_REPLICAS:-1}" -RANCHER_NAMESPACE="${RANCHER_NAMESPACE:-cattle-system}" - -# ingress-nginx settings -INGRESS_NAMESPACE="${INGRESS_NAMESPACE:-ingress-nginx}" -INGRESS_CLASS_NAME="${INGRESS_CLASS_NAME:-nginx}" - -# cert-manager settings -CERT_MANAGER_NAMESPACE="${CERT_MANAGER_NAMESPACE:-cert-manager}" - -# Binary locations -KUBEADM_BIN="/usr/local/bin/kubeadm" -KUBECTL_BIN="/usr/local/bin/kubectl" -KUBELET_BIN="/usr/local/bin/kubelet" - -# Detect the real invoking user when run with sudo -REAL_USER="${SUDO_USER:-root}" -REAL_HOME="$(getent passwd "$REAL_USER" | cut -d: -f6 || true)" -REAL_HOME="${REAL_HOME:-/root}" -REAL_KUBECONFIG_DIR="${REAL_HOME}/.kube" - -# ---------- Logging ---------- -log() { - echo - echo "============================================================" - echo "[INFO] $*" - echo "============================================================" -} - -warn() { - echo - echo "[WARN] $*" >&2 -} - -die() { - echo - echo "[ERROR] $*" >&2 - exit 1 -} - -# ---------- Helpers ---------- -require_cmd() { - command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1" -} - -retry() { - local attempts="${1:-10}" - local sleep_seconds="${2:-5}" - shift 2 || true - - local n=1 - until "$@"; do - if (( n >= attempts )); then - return 1 - fi - warn "Command failed (attempt ${n}/${attempts}): $*" - sleep "${sleep_seconds}" - ((n++)) - done -} - -helm_repo_add_force() { - local name="$1" - local url="$2" - if helm repo list 2>/dev/null | awk '{print $1}' | grep -qx "${name}"; then - helm repo add "${name}" "${url}" --force-update >/dev/null - else - helm repo add "${name}" "${url}" >/dev/null - fi -} - -kubectl_ns_apply() { - local ns="$1" - "${KUBECTL_BIN}" create namespace "${ns}" --dry-run=client -o yaml | "${KUBECTL_BIN}" apply -f - -} - -download_k8s_binary() { - local name="$1" - local tmpdir - tmpdir="$(mktemp -d)" - - curl -fsSL -o "${tmpdir}/${name}" \ - "https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}" - - curl -fsSL -o "${tmpdir}/${name}.sha256" \ - "https://dl.k8s.io/release/${K8S_VERSION}/bin/linux/${K8S_ARCH}/${name}.sha256" - - ( - cd "${tmpdir}" - echo "$(cat "${name}.sha256") ${name}" | sha256sum --check --status - ) || die "Checksum verification failed for ${name} ${K8S_VERSION}" - - install -o root -g root -m 0755 "${tmpdir}/${name}" "/usr/local/bin/${name}" - rm -rf "${tmpdir}" -} - -install_kubelet_service() { - log "Installing kubelet systemd service" - - mkdir -p /etc/systemd/system/kubelet.service.d - touch /etc/default/kubelet - - cat >/etc/systemd/system/kubelet.service <<'EOF' -[Unit] -Description=kubelet: The Kubernetes Node Agent -Documentation=https://kubernetes.io/docs/ -After=containerd.service network-online.target -Wants=network-online.target -Requires=containerd.service - -[Service] -ExecStart=/usr/local/bin/kubelet -Restart=always -StartLimitInterval=0 -RestartSec=10 - -[Install] -WantedBy=multi-user.target -EOF - - cat >/etc/systemd/system/kubelet.service.d/10-kubeadm.conf <<'EOF' -[Service] -Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf" -Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml" -EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env -EnvironmentFile=-/etc/default/kubelet -ExecStart= -ExecStart=/usr/local/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS -EOF -} - -existing_cluster_version() { - if [[ -f /etc/kubernetes/admin.conf ]]; then - "${KUBECTL_BIN}" --kubeconfig=/etc/kubernetes/admin.conf version -o json 2>/dev/null | \ - jq -r '.serverVersion.gitVersion // empty' - fi -} - -ensure_rancher_supported_k8s() { - [[ "${K8S_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \ - "Rancher is enabled, but K8S_VERSION=${K8S_VERSION} is not a 1.34.x release. Set K8S_VERSION to a supported 1.34.x patch release." -} - -# ---------- Root check ---------- -if [[ "${EUID}" -ne 0 ]]; then - die "Run this script as root, for example: sudo ./master_node_install.sh" -fi - -# ---------- Cleanup on error ---------- -on_error() { - local exit_code=$? - warn "Script failed on line $1 with exit code ${exit_code}" - warn "Useful diagnostics:" - echo " journalctl -u containerd -u kubelet -b --no-pager | tail -n 200" - echo " systemctl status containerd kubelet --no-pager" - echo " ${KUBECTL_BIN} get nodes -o wide" - echo " ${KUBECTL_BIN} get pods -A" - exit "${exit_code}" -} -trap 'on_error $LINENO' ERR - -if [[ "${INSTALL_RANCHER}" == "true" ]]; then - ensure_rancher_supported_k8s -fi - -# ---------- Step 1: Disable swap ---------- -log "Disabling swap immediately" -swapoff -a || true - -log "Disabling swap persistently in /etc/fstab" -if [[ -f /etc/fstab ]]; then - cp /etc/fstab /etc/fstab.bak.$(date +%Y%m%d%H%M%S) - sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab -fi - -# ---------- Step 2: Update system ---------- -log "Updating package databases and system packages" -pacman -Syu --noconfirm - -# ---------- Step 3: Resolve iptables conflict automatically ---------- -log "Resolving iptables backend for Kubernetes" -if pacman -Q iptables >/dev/null 2>&1; then - log "Removing legacy iptables package so iptables-nft can be installed" - pacman -Rdd --noconfirm iptables || true -fi - -# ---------- Step 4: Install required Arch packages ---------- -log "Installing runtime and support packages from Arch" -pacman -S --needed --noconfirm \ - ca-certificates \ - curl \ - containerd \ - cni-plugins \ - crictl \ - ethtool \ - iptables-nft \ - conntrack-tools \ - socat \ - tar \ - gzip \ - jq \ - openssl \ - helm - -# ---------- Step 5: Remove Arch Kubernetes packages if present ---------- -log "Removing Arch-provided kubeadm/kubectl/kubelet if present" -for pkg in kubeadm kubectl kubelet; do - if pacman -Q "${pkg}" >/dev/null 2>&1; then - pacman -Rdd --noconfirm "${pkg}" || true - fi -done - -# ---------- Step 6: Install pinned Kubernetes binaries ---------- -log "Installing Kubernetes binaries ${K8S_VERSION}" -download_k8s_binary kubeadm -download_k8s_binary kubectl -download_k8s_binary kubelet - -require_cmd "${KUBEADM_BIN}" -require_cmd "${KUBECTL_BIN}" -require_cmd "${KUBELET_BIN}" - -# ---------- Step 7: Kernel modules ---------- -log "Configuring required kernel modules" -cat >/etc/modules-load.d/k8s.conf <<'EOF' -overlay -br_netfilter -EOF - -modprobe overlay -modprobe br_netfilter - -# ---------- Step 8: Sysctl ---------- -log "Configuring Kubernetes sysctl settings" -cat >/etc/sysctl.d/99-kubernetes-cri.conf <<'EOF' -net.bridge.bridge-nf-call-iptables = 1 -net.bridge.bridge-nf-call-ip6tables = 1 -net.ipv4.ip_forward = 1 -EOF - -sysctl --system - -# ---------- Step 9: containerd config ---------- -log "Configuring containerd" -mkdir -p /etc/containerd - -if [[ ! -f /etc/containerd/config.toml ]]; then - containerd config default >/etc/containerd/config.toml -else - cp /etc/containerd/config.toml /etc/containerd/config.toml.bak.$(date +%Y%m%d%H%M%S) -fi - -sed -ri 's/^\s*SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml - -# ---------- Step 10: kubelet service ---------- -install_kubelet_service - -# ---------- Step 11: Enable services ---------- -log "Enabling and starting containerd and kubelet" -systemctl daemon-reload -systemctl enable --now containerd -systemctl enable --now kubelet - -# ---------- Step 12: Wait for containerd ---------- -log "Waiting for containerd to become active" -for i in {1..20}; do - if systemctl is-active --quiet containerd; then - break - fi - sleep 1 -done -systemctl is-active --quiet containerd || die "containerd did not start successfully" - -# ---------- Step 13: Handle existing cluster ---------- -EXISTING_CLUSTER_VERSION="$(existing_cluster_version || true)" -if [[ -n "${EXISTING_CLUSTER_VERSION}" ]]; then - log "Detected existing Kubernetes cluster: ${EXISTING_CLUSTER_VERSION}" - if [[ "${EXISTING_CLUSTER_VERSION}" != "${K8S_VERSION}" ]]; then - die "Existing cluster version is ${EXISTING_CLUSTER_VERSION}, but this script is pinned to ${K8S_VERSION}. Reset/rebuild the cluster before rerunning." - fi -fi - -# ---------- Step 14: Pre-pull Kubernetes images ---------- -log "Pulling Kubernetes control-plane images" -"${KUBEADM_BIN}" config images pull --kubernetes-version="${K8S_VERSION}" - -# ---------- Step 15: Initialize cluster ---------- -if [[ -f /etc/kubernetes/admin.conf ]]; then - warn "/etc/kubernetes/admin.conf already exists; skipping kubeadm init" -else - log "Initializing Kubernetes control plane" - "${KUBEADM_BIN}" init \ - --kubernetes-version="${K8S_VERSION}" \ - --pod-network-cidr="${POD_CIDR}" -fi - -# ---------- Step 16: Configure kubectl for root ---------- -log "Configuring kubectl for root" -mkdir -p "${KUBECONFIG_DIR_ROOT}" -cp -f /etc/kubernetes/admin.conf "${KUBECONFIG_DIR_ROOT}/config" -chmod 600 "${KUBECONFIG_DIR_ROOT}/config" - -export KUBECONFIG=/etc/kubernetes/admin.conf - -# ---------- Step 17: Configure kubectl for invoking user ---------- -if [[ -n "${REAL_HOME}" && -d "${REAL_HOME}" ]]; then - log "Configuring kubectl for user ${REAL_USER}" - mkdir -p "${REAL_KUBECONFIG_DIR}" - cp -f /etc/kubernetes/admin.conf "${REAL_KUBECONFIG_DIR}/config" - chown -R "${REAL_USER}:${REAL_USER}" "${REAL_KUBECONFIG_DIR}" - chmod 600 "${REAL_KUBECONFIG_DIR}/config" -else - warn "Could not determine invoking user's home directory; skipping user kubeconfig setup" -fi - -# ---------- Step 18: Verify cluster version ---------- -log "Verifying Kubernetes server version" -SERVER_VERSION="$("${KUBECTL_BIN}" version -o json | jq -r '.serverVersion.gitVersion')" -[[ "${SERVER_VERSION}" =~ ${K8S_SERIES_REGEX} ]] || die \ - "Cluster server version ${SERVER_VERSION} is not a supported 1.34.x release for this Rancher workflow." - -# ---------- Step 19: Wait for API ---------- -log "Waiting for Kubernetes API to become responsive" -retry 60 5 "${KUBECTL_BIN}" version --request-timeout=10s >/dev/null - -# ---------- Step 20: Optionally allow workloads on control-plane ---------- -if [[ "${ALLOW_WORKLOADS_ON_CONTROL_PLANE}" == "true" ]]; then - log "Allowing workloads on the control-plane node (single-node/lab mode)" - "${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/control-plane- >/dev/null 2>&1 || true - "${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/master- >/dev/null 2>&1 || true -fi - -# ---------- Step 21: Install Calico ---------- -log "Installing Calico networking" -"${KUBECTL_BIN}" apply -f "https://raw.githubusercontent.com/projectcalico/calico/${CALICO_VERSION}/manifests/calico.yaml" - -# ---------- Step 22: Wait for node readiness ---------- -log "Waiting for node(s) to become Ready" -"${KUBECTL_BIN}" wait --for=condition=Ready node --all --timeout=10m - -# ---------- Step 23: Wait for Calico ---------- -log "Waiting for Calico components" -"${KUBECTL_BIN}" -n kube-system rollout status daemonset/calico-node --timeout=10m || true -"${KUBECTL_BIN}" -n kube-system rollout status deployment/calico-kube-controllers --timeout=10m || true - -# ---------- Step 24: Save worker join command ---------- -log "Saving worker join command" -"${KUBEADM_BIN}" token create --print-join-command > "${JOIN_COMMAND_FILE}" -chmod 700 "${JOIN_COMMAND_FILE}" - -# ---------- Step 25: Determine node info ---------- -log "Determining control-plane node information" -NODE_NAME="$("${KUBECTL_BIN}" get nodes -o jsonpath='{.items[0].metadata.name}')" -NODE_IP="$("${KUBECTL_BIN}" get node "${NODE_NAME}" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')" - -if [[ -z "${NODE_NAME}" || -z "${NODE_IP}" ]]; then - die "Failed to determine node name or node IP" -fi - -if [[ -z "${RANCHER_HOSTNAME}" ]]; then - RANCHER_HOSTNAME="rancher.${NODE_IP}.sslip.io" -fi - -if [[ -z "${RANCHER_BOOTSTRAP_PASSWORD}" ]]; then - RANCHER_BOOTSTRAP_PASSWORD="$(openssl rand -base64 24 | tr -d '\n' | tr '/+' 'AB' | cut -c1-20)" -fi - -echo "${RANCHER_BOOTSTRAP_PASSWORD}" >/root/rancher-bootstrap-password.txt -chmod 600 /root/rancher-bootstrap-password.txt - -# ---------- Step 26: Install Helm repos ---------- -if [[ "${INSTALL_HELM}" == "true" ]]; then - log "Configuring Helm repositories" - helm_repo_add_force ingress-nginx https://kubernetes.github.io/ingress-nginx - helm_repo_add_force jetstack https://charts.jetstack.io - - case "${RANCHER_REPO_CHANNEL}" in - stable) - helm_repo_add_force rancher-stable https://releases.rancher.com/server-charts/stable - RANCHER_CHART="rancher-stable/rancher" - ;; - latest) - helm_repo_add_force rancher-latest https://releases.rancher.com/server-charts/latest - RANCHER_CHART="rancher-latest/rancher" - ;; - alpha) - helm_repo_add_force rancher-alpha https://releases.rancher.com/server-charts/alpha - RANCHER_CHART="rancher-alpha/rancher" - ;; - *) - die "Invalid RANCHER_REPO_CHANNEL: ${RANCHER_REPO_CHANNEL} (expected: stable, latest, alpha)" - ;; - esac - - helm repo update -fi - -# ---------- Step 27: Install ingress-nginx ---------- -if [[ "${INSTALL_RANCHER}" == "true" ]]; then - log "Installing ingress-nginx" - - kubectl_ns_apply "${INGRESS_NAMESPACE}" - - helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ - --namespace "${INGRESS_NAMESPACE}" \ - --create-namespace \ - --set controller.kind=DaemonSet \ - --set controller.hostNetwork=true \ - --set controller.dnsPolicy=ClusterFirstWithHostNet \ - --set controller.service.type=ClusterIP \ - --set controller.ingressClass="${INGRESS_CLASS_NAME}" \ - --set controller.ingressClassResource.name="${INGRESS_CLASS_NAME}" \ - --set controller.ingressClassResource.default=true \ - --set controller.watchIngressWithoutClass=true \ - --set controller.reportNodeInternalIp=true \ - --wait \ - --timeout 15m - - log "Waiting for ingress-nginx controller" - "${KUBECTL_BIN}" -n "${INGRESS_NAMESPACE}" rollout status daemonset/ingress-nginx-controller --timeout=15m -fi - -# ---------- Step 28: Install cert-manager ---------- -if [[ "${INSTALL_RANCHER}" == "true" ]]; then - log "Installing cert-manager" - - kubectl_ns_apply "${CERT_MANAGER_NAMESPACE}" - - helm upgrade --install cert-manager jetstack/cert-manager \ - --namespace "${CERT_MANAGER_NAMESPACE}" \ - --create-namespace \ - --set crds.enabled=true \ - --wait \ - --timeout 15m - - log "Waiting for cert-manager deployments" - "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager --timeout=15m - "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-cainjector --timeout=15m - "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-webhook --timeout=15m -fi - -# ---------- Step 29: Install Rancher ---------- -if [[ "${INSTALL_RANCHER}" == "true" ]]; then - log "Installing Rancher" - - kubectl_ns_apply "${RANCHER_NAMESPACE}" - - helm upgrade --install rancher "${RANCHER_CHART}" \ - --namespace "${RANCHER_NAMESPACE}" \ - --create-namespace \ - --set hostname="${RANCHER_HOSTNAME}" \ - --set bootstrapPassword="${RANCHER_BOOTSTRAP_PASSWORD}" \ - --set replicas="${RANCHER_REPLICAS}" \ - --set ingress.ingressClassName="${INGRESS_CLASS_NAME}" \ - --set ingress.tls.source=rancher \ - --wait \ - --timeout 20m - - log "Waiting for Rancher rollout" - "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher --timeout=20m || true -fi - -# ---------- Step 30: Show cluster status ---------- -log "Cluster status" -"${KUBECTL_BIN}" get nodes -o wide || true -echo -"${KUBECTL_BIN}" get pods -A || true -echo -"${KUBECTL_BIN}" get ingress -A || true - -# ---------- Final output ---------- -echo -echo "Kubernetes control plane installation is complete." -echo -echo "Pinned Kubernetes version:" -echo " ${K8S_VERSION}" -echo -echo "kubectl configured for:" -echo " root: ${KUBECONFIG_DIR_ROOT}/config" -echo " ${REAL_USER}: ${REAL_KUBECONFIG_DIR}/config" -echo -echo "Worker join command saved to:" -echo " ${JOIN_COMMAND_FILE}" -echo -echo "To view it:" -echo " sudo cat ${JOIN_COMMAND_FILE}" -echo - -if [[ "${INSTALL_RANCHER}" == "true" ]]; then - echo "Rancher install completed." - echo - echo "Rancher URL:" - echo " https://${RANCHER_HOSTNAME}" - echo - echo "Rancher bootstrap password saved to:" - echo " /root/rancher-bootstrap-password.txt" - echo - echo "To view it:" - echo " sudo cat /root/rancher-bootstrap-password.txt" - echo - echo "Notes:" - echo " - Rancher is using a 1.34.x Kubernetes control plane on purpose for compatibility." - echo " - ingress-nginx is using host networking, so access Rancher directly on this node's IP over 443." - echo " - If a local firewall is enabled, ensure ports 80 and 443 are allowed." - echo " - Rancher-generated TLS will usually produce a browser warning until you trust the cert." - echo -fi \ No newline at end of file