From fd1293903733cfbf22b1162627be3d961af41ec9 Mon Sep 17 00:00:00 2001 From: RomanNum3ral Date: Fri, 27 Mar 2026 17:56:12 +0000 Subject: [PATCH] Update master_node_install.sh --- master_node_install.sh | 431 ++++++++++++++++++++++++++--------------- 1 file changed, 277 insertions(+), 154 deletions(-) diff --git a/master_node_install.sh b/master_node_install.sh index a749512..1ac151a 100644 --- a/master_node_install.sh +++ b/master_node_install.sh @@ -4,40 +4,34 @@ set -Eeuo pipefail ######################################## # Arch Linux Rancher Management Cluster # RKE2 + Rancher + optional Longhorn -# -# PURPOSE -# - Replaces kubeadm with RKE2 so Rancher sits on a Rancher-native distro -# - Creates a dedicated management cluster -# - Installs Rancher in a way that fits Rancher lifecycle management better -# - Optionally installs Longhorn for persistent volumes -# -# IMPORTANT -# - Use a REAL DNS name for RANCHER_HOSTNAME -# - This script is for the Rancher management cluster itself -# - Create/import downstream clusters from Rancher afterward -# - Avoid running general app workloads on this local Rancher cluster +# + fixed RKE2 config generation +# + cloudflared-friendly Rancher hostname handling +# + bootstrap cleanup on failed first start ######################################## -# ---------- User-configurable defaults ---------- +# ---------- Config ---------- -# RKE2 / Kubernetes +# RKE2 RKE2_CHANNEL="${RKE2_CHANNEL:-stable}" RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}" RKE2_TOKEN="${RKE2_TOKEN:-}" CLUSTER_CIDR="${CLUSTER_CIDR:-192.168.0.0/16}" SERVICE_CIDR="${SERVICE_CIDR:-10.43.0.0/16}" CLUSTER_DNS="${CLUSTER_DNS:-10.43.0.10}" -CNI_PLUGIN="${CNI_PLUGIN:-canal}" # canal | calico | cilium | flannel +CNI_PLUGIN="${CNI_PLUGIN:-canal}" # canal | calico | cilium | flannel DISABLE_RKE2_INGRESS="${DISABLE_RKE2_INGRESS:-true}" +ALLOW_SCHEDULING_ON_SERVER="${ALLOW_SCHEDULING_ON_SERVER:-true}" +RESET_FAILED_BOOTSTRAP="${RESET_FAILED_BOOTSTRAP:-true}" # Rancher INSTALL_RANCHER="${INSTALL_RANCHER:-true}" -RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" # REQUIRED, e.g. rancher.example.com +RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" # REQUIRED, e.g. rancher.example.com RANCHER_NAMESPACE="${RANCHER_NAMESPACE:-cattle-system}" +RANCHER_REPO_CHANNEL="${RANCHER_REPO_CHANNEL:-stable}" # stable | latest | alpha RANCHER_CHART_VERSION="${RANCHER_CHART_VERSION:-2.13.4}" RANCHER_BOOTSTRAP_PASSWORD="${RANCHER_BOOTSTRAP_PASSWORD:-}" RANCHER_REPLICAS="${RANCHER_REPLICAS:-1}" -RANCHER_TLS_SOURCE="${RANCHER_TLS_SOURCE:-rancher}" # rancher | letsEncrypt | secret +RANCHER_TLS_SOURCE="${RANCHER_TLS_SOURCE:-rancher}" # rancher | letsEncrypt | secret RANCHER_PRIVATE_CA="${RANCHER_PRIVATE_CA:-false}" LETSENCRYPT_EMAIL="${LETSENCRYPT_EMAIL:-}" @@ -52,7 +46,7 @@ CERT_MANAGER_NAMESPACE="${CERT_MANAGER_NAMESPACE:-cert-manager}" CERT_MANAGER_CHART_VERSION="${CERT_MANAGER_CHART_VERSION:-v1.18.3}" # Longhorn -INSTALL_LONGHORN="${INSTALL_LONGHORN:-true}" +INSTALL_LONGHORN="${INSTALL_LONGHORN:-false}" LONGHORN_NAMESPACE="${LONGHORN_NAMESPACE:-longhorn-system}" LONGHORN_CHART_VERSION="${LONGHORN_CHART_VERSION:-1.11.0}" LONGHORN_DEFAULT_REPLICA_COUNT="${LONGHORN_DEFAULT_REPLICA_COUNT:-1}" @@ -61,8 +55,11 @@ LONGHORN_DEFAULT_REPLICA_COUNT="${LONGHORN_DEFAULT_REPLICA_COUNT:-1}" INSTALL_HELM="${INSTALL_HELM:-true}" HELM_VERSION="${HELM_VERSION:-v3.18.4}" -# Misc -ALLOW_SCHEDULING_ON_SERVER="${ALLOW_SCHEDULING_ON_SERVER:-true}" +# cloudflared helper file only; does not install cloudflared +WRITE_CLOUDFLARED_EXAMPLE="${WRITE_CLOUDFLARED_EXAMPLE:-true}" +CLOUDFLARED_SERVICE_TARGET="${CLOUDFLARED_SERVICE_TARGET:-https://127.0.0.1}" + +# User detection REAL_USER="${SUDO_USER:-root}" REAL_HOME="$(getent passwd "${REAL_USER}" | cut -d: -f6 || true)" REAL_HOME="${REAL_HOME:-/root}" @@ -91,7 +88,7 @@ die() { # ---------- Helpers ---------- require_root() { - [[ "${EUID}" -eq 0 ]] || die "Run as root: sudo ./rancher_rke2_management_install.sh" + [[ "${EUID}" -eq 0 ]] || die "Run as root: sudo ./master_node_install.sh" } require_cmd() { @@ -99,9 +96,9 @@ require_cmd() { } retry() { - local attempts="${1:-10}" - local sleep_seconds="${2:-5}" - shift 2 || true + local attempts="$1" + local sleep_seconds="$2" + shift 2 local n=1 until "$@"; do @@ -126,7 +123,7 @@ helm_repo_add_force() { kubectl_ns_apply() { local ns="$1" - kubectl create namespace "${ns}" --dry-run=client -o yaml | kubectl apply -f - + "${KUBECTL_BIN}" create namespace "${ns}" --dry-run=client -o yaml | "${KUBECTL_BIN}" apply -f - } write_file_if_changed() { @@ -142,41 +139,89 @@ write_file_if_changed() { rm -f "${tmp}" } +# ---------- Tool paths ---------- + +RKE2_BIN_DIR="/var/lib/rancher/rke2/bin" +KUBECTL_BIN="${RKE2_BIN_DIR}/kubectl" +CRICTL_BIN="${RKE2_BIN_DIR}/crictl" +KUBECONFIG_SYSTEM="/etc/rancher/rke2/rke2.yaml" +RKE2_CONFIG="/etc/rancher/rke2/config.yaml" +RKE2_SERVER_STATE_DIR="/var/lib/rancher/rke2/server" +RKE2_AGENT_LOG_DIR="/var/lib/rancher/rke2/agent/logs" +CRICTL_RUNTIME_ENDPOINT="unix:///run/k3s/containerd/containerd.sock" +CRICTL_IMAGE_ENDPOINT="unix:///run/k3s/containerd/containerd.sock" + # ---------- Validation ---------- validate_inputs() { if [[ "${INSTALL_RANCHER}" == "true" && -z "${RANCHER_HOSTNAME}" ]]; then - die "RANCHER_HOSTNAME must be set to a real DNS name, e.g. rancher.example.com" + die "RANCHER_HOSTNAME must be set, for example: export RANCHER_HOSTNAME=rancher.example.com" + fi + + if [[ -z "${RKE2_TOKEN}" ]]; then + RKE2_TOKEN="$(openssl rand -hex 32)" fi if [[ "${INSTALL_RANCHER}" == "true" && -z "${RANCHER_BOOTSTRAP_PASSWORD}" ]]; then RANCHER_BOOTSTRAP_PASSWORD="$(openssl rand -base64 24 | tr -d '\n' | tr '/+' 'AB' | cut -c1-20)" fi - if [[ "${INSTALL_RANCHER}" == "true" && "${RANCHER_TLS_SOURCE}" == "letsEncrypt" && -z "${LETSENCRYPT_EMAIL}" ]]; then + if [[ "${RANCHER_TLS_SOURCE}" == "letsEncrypt" && -z "${LETSENCRYPT_EMAIL}" ]]; then die "LETSENCRYPT_EMAIL must be set when RANCHER_TLS_SOURCE=letsEncrypt" fi - if [[ -z "${RKE2_TOKEN}" ]]; then - RKE2_TOKEN="$(openssl rand -hex 32)" - fi + case "${RANCHER_REPO_CHANNEL}" in + stable|latest|alpha) ;; + *) + die "Invalid RANCHER_REPO_CHANNEL=${RANCHER_REPO_CHANNEL}. Use stable, latest, or alpha." + ;; + esac } -# ---------- Step 1: base packages ---------- +# ---------- Error trap ---------- + +on_error() { + local exit_code=$? + warn "Script failed on line $1 with exit code ${exit_code}" + warn "Useful diagnostics:" + echo " sudo systemctl status rke2-server -l --no-pager" + echo " sudo journalctl -u rke2-server -n 200 --no-pager" + echo " sudo tail -n 200 ${RKE2_AGENT_LOG_DIR}/kubelet.log" + echo " sudo ${CRICTL_BIN} --runtime-endpoint ${CRICTL_RUNTIME_ENDPOINT} ps -a" + echo " sudo ${CRICTL_BIN} --runtime-endpoint ${CRICTL_RUNTIME_ENDPOINT} pods" + exit "${exit_code}" +} +trap 'on_error $LINENO' ERR + +# ---------- Step 1: Install base packages ---------- install_base_packages() { log "Installing required Arch packages" pacman -Sy --noconfirm --needed \ - curl tar gzip jq openssl unzip wget \ - iptables nftables conntrack-tools socat ethtool \ - iproute2 ca-certificates gnupg bash-completion \ - open-iscsi nfs-utils cni-plugins + ca-certificates \ + curl \ + tar \ + gzip \ + jq \ + openssl \ + unzip \ + wget \ + iptables-nft \ + nftables \ + conntrack-tools \ + socat \ + ethtool \ + iproute2 \ + bash-completion \ + open-iscsi \ + nfs-utils \ + cni-plugins systemctl enable --now iscsid || true } -# ---------- Step 2: disable swap ---------- +# ---------- Step 2: Disable swap ---------- disable_swap() { log "Disabling swap" @@ -185,11 +230,11 @@ disable_swap() { if [[ -f /etc/fstab ]]; then cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)" - sed -ri '/\sswap\s/s/^/# /' /etc/fstab + sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab fi } -# ---------- Step 3: kernel modules / sysctl ---------- +# ---------- Step 3: Kernel modules and sysctl ---------- configure_kernel_networking() { log "Configuring kernel modules and sysctl" @@ -216,21 +261,20 @@ EOF sysctl --system } -# ---------- Step 4: firewall note ---------- +# ---------- Step 4: firewalld ---------- handle_firewall() { log "Checking for firewalld" if systemctl list-unit-files 2>/dev/null | grep -q '^firewalld\.service'; then if systemctl is-enabled firewalld >/dev/null 2>&1 || systemctl is-active firewalld >/dev/null 2>&1; then - warn "firewalld appears enabled/active. RKE2 docs warn that firewalld conflicts with the default Canal networking stack." - warn "Disabling firewalld on this node." + warn "firewalld is active or enabled; disabling it for RKE2 compatibility" systemctl disable --now firewalld || true fi fi } -# ---------- Step 5: install Helm ---------- +# ---------- Step 5: Install Helm ---------- install_helm() { [[ "${INSTALL_HELM}" == "true" ]] || return 0 @@ -257,7 +301,7 @@ install_helm() { helm version } -# ---------- Step 6: install RKE2 ---------- +# ---------- Step 6: Install RKE2 ---------- install_rke2() { log "Installing RKE2 ${RKE2_VERSION}" @@ -269,54 +313,82 @@ install_rke2() { curl -sfL https://get.rke2.io | sh - } -# ---------- Step 7: configure RKE2 ---------- +# ---------- Step 7: Detect node info ---------- + +detect_node_name() { + local name="" + name="$(hostnamectl --static 2>/dev/null || true)" + [[ -n "${name}" ]] || name="$(uname -n)" + [[ -n "${name}" ]] || die "Failed to determine node name" + printf '%s\n' "${name}" +} + +detect_node_ip() { + local ip="" + ip="$(ip -4 route get 1.1.1.1 2>/dev/null | awk '{for(i=1;i<=NF;i++) if ($i=="src") {print $(i+1); exit}}')" + [[ -n "${ip}" ]] || ip="$(hostname -I 2>/dev/null | awk '{print $1}')" + [[ -n "${ip}" ]] || die "Failed to determine node IP" + printf '%s\n' "${ip}" +} + +# ---------- Step 8: Configure RKE2 ---------- configure_rke2() { - log "Writing /etc/rancher/rke2/config.yaml" + log "Writing ${RKE2_CONFIG}" mkdir -p /etc/rancher/rke2 local node_ip node_name - node_ip="$(ip -4 route get 1.1.1.1 2>/dev/null | awk '{for(i=1;i<=NF;i++) if ($i=="src") {print $(i+1); exit}}')" - [[ -n "${node_ip}" ]] || node_ip="$(hostnamectl --static 2>/dev/null || true)" - [[ -n "${node_ip}" ]] || node_ip="$(hostname -I 2>/dev/null | awk '{print $1}')" + node_ip="$(detect_node_ip)" + node_name="$(detect_node_name)" - node_name="$(hostnamectl --static 2>/dev/null || true)" - [[ -n "${node_name}" ]] || node_name="$(uname -n)" + { + echo "token: ${RKE2_TOKEN}" + echo 'write-kubeconfig-mode: "0644"' + echo "node-name: ${node_name}" + echo "tls-san:" + echo " - ${node_ip}" + echo " - 127.0.0.1" + echo "cluster-cidr: ${CLUSTER_CIDR}" + echo "service-cidr: ${SERVICE_CIDR}" + echo "cluster-dns: ${CLUSTER_DNS}" + echo "cni: ${CNI_PLUGIN}" + echo "etcd-expose-metrics: true" + if [[ "${DISABLE_RKE2_INGRESS}" == "true" ]]; then + echo "disable:" + echo " - rke2-ingress-nginx" + fi + } > "${RKE2_CONFIG}" +} - [[ -n "${node_ip}" ]] || die "Failed to determine node IP" - [[ -n "${node_name}" ]] || die "Failed to determine node name" +# ---------- Step 9: Write crictl config ---------- - cat >/etc/rancher/rke2/config.yaml </etc/crictl.yaml <> /etc/rancher/rke2/config.yaml - fi +# ---------- Step 10: Reset failed bootstrap if needed ---------- - if [[ "${DISABLE_RKE2_INGRESS}" == "true" ]]; then - cat >> /etc/rancher/rke2/config.yaml <<'EOF' -disable: - - rke2-ingress-nginx -EOF +reset_failed_rke2_bootstrap() { + [[ "${RESET_FAILED_BOOTSTRAP}" == "true" ]] || return 0 + + if [[ -d "${RKE2_SERVER_STATE_DIR}/db" ]]; then + warn "Removing previous failed RKE2 bootstrap state" + systemctl stop rke2-server || true + rm -rf "${RKE2_SERVER_STATE_DIR}/db" + rm -f "${RKE2_AGENT_LOG_DIR}/kubelet.log" || true fi } -# ---------- Step 8: start RKE2 ---------- +# ---------- Step 11: Start RKE2 ---------- start_rke2() { log "Starting rke2-server" @@ -324,73 +396,100 @@ start_rke2() { systemctl daemon-reload systemctl enable --now rke2-server - log "Waiting for RKE2 server to become active" - retry 60 5 systemctl is-active --quiet rke2-server || { + log "Waiting for rke2-server service to reach active state" + retry 90 5 systemctl is-active --quiet rke2-server || { journalctl -u rke2-server --no-pager -n 200 || true - die "rke2-server did not start successfully" + [[ -f "${RKE2_AGENT_LOG_DIR}/kubelet.log" ]] && tail -n 200 "${RKE2_AGENT_LOG_DIR}/kubelet.log" || true + die "rke2-server did not become active" } - export PATH="/var/lib/rancher/rke2/bin:${PATH}" - export KUBECONFIG=/etc/rancher/rke2/rke2.yaml + export PATH="${RKE2_BIN_DIR}:${PATH}" + export KUBECONFIG="${KUBECONFIG_SYSTEM}" log "Waiting for Kubernetes API" - retry 60 5 kubectl get nodes >/dev/null 2>&1 || { - kubectl get pods -A || true + retry 90 5 "${KUBECTL_BIN}" get nodes >/dev/null 2>&1 || { journalctl -u rke2-server --no-pager -n 200 || true + [[ -f "${RKE2_AGENT_LOG_DIR}/kubelet.log" ]] && tail -n 200 "${RKE2_AGENT_LOG_DIR}/kubelet.log" || true die "Kubernetes API did not become ready" } } -# ---------- Step 9: configure kubectl for root and real user ---------- +# ---------- Step 12: Configure kubeconfig ---------- configure_kubeconfig() { - log "Configuring kubeconfig" + log "Configuring kubeconfig for root and user" mkdir -p /root/.kube - cp -f /etc/rancher/rke2/rke2.yaml /root/.kube/config + cp -f "${KUBECONFIG_SYSTEM}" /root/.kube/config chmod 600 /root/.kube/config if [[ -n "${REAL_HOME}" && -d "${REAL_HOME}" ]]; then mkdir -p "${REAL_KUBECONFIG_DIR}" - cp -f /etc/rancher/rke2/rke2.yaml "${REAL_KUBECONFIG_DIR}/config" + cp -f "${KUBECONFIG_SYSTEM}" "${REAL_KUBECONFIG_DIR}/config" chown -R "${REAL_USER}:${REAL_USER}" "${REAL_KUBECONFIG_DIR}" chmod 600 "${REAL_KUBECONFIG_DIR}/config" - else - warn "Could not determine invoking user's home directory; skipping user kubeconfig setup" fi } -# ---------- Step 10: allow scheduling on single-node server if requested ---------- +# ---------- Step 13: Allow scheduling on server ---------- allow_server_scheduling() { if [[ "${ALLOW_SCHEDULING_ON_SERVER}" == "true" ]]; then - log "Removing control-plane scheduling taints for single-node use" - kubectl taint nodes --all node-role.kubernetes.io/control-plane- || true - kubectl taint nodes --all node-role.kubernetes.io/master- || true + log "Removing control-plane taints for single-node scheduling" + "${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/control-plane- || true + "${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/master- || true fi } -# ---------- Step 11: wait for core system ---------- +# ---------- Step 14: Wait for core system ---------- wait_for_core_system() { log "Waiting for core system pods" - retry 60 5 kubectl -n kube-system rollout status deployment/coredns --timeout=15s || true - kubectl get nodes -o wide + retry 90 5 "${KUBECTL_BIN}" get nodes >/dev/null 2>&1 + retry 90 5 "${KUBECTL_BIN}" -n kube-system get pods >/dev/null 2>&1 + + "${KUBECTL_BIN}" get nodes -o wide echo - kubectl get pods -A + "${KUBECTL_BIN}" get pods -A } -# ---------- Step 12: install ingress-nginx ---------- +# ---------- Step 15: Helm repos ---------- + +configure_helm_repos() { + [[ "${INSTALL_HELM}" == "true" ]] || return 0 + + log "Configuring Helm repositories" + + helm_repo_add_force ingress-nginx https://kubernetes.github.io/ingress-nginx + helm_repo_add_force jetstack https://charts.jetstack.io + helm_repo_add_force longhorn https://charts.longhorn.io + + case "${RANCHER_REPO_CHANNEL}" in + stable) + helm_repo_add_force rancher-stable https://releases.rancher.com/server-charts/stable + RANCHER_CHART="rancher-stable/rancher" + ;; + latest) + helm_repo_add_force rancher-latest https://releases.rancher.com/server-charts/latest + RANCHER_CHART="rancher-latest/rancher" + ;; + alpha) + helm_repo_add_force rancher-alpha https://releases.rancher.com/server-charts/alpha + RANCHER_CHART="rancher-alpha/rancher" + ;; + esac + + helm repo update +} + +# ---------- Step 16: Install ingress-nginx ---------- install_ingress_nginx() { [[ "${INSTALL_INGRESS_NGINX}" == "true" ]] || return 0 log "Installing ingress-nginx" - helm_repo_add_force ingress-nginx https://kubernetes.github.io/ingress-nginx - helm repo update - kubectl_ns_apply "${INGRESS_NAMESPACE}" helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \ @@ -408,10 +507,10 @@ install_ingress_nginx() { --wait \ --timeout 20m - kubectl -n "${INGRESS_NAMESPACE}" rollout status daemonset/ingress-nginx-controller --timeout=20m + "${KUBECTL_BIN}" -n "${INGRESS_NAMESPACE}" rollout status daemonset/ingress-nginx-controller --timeout=20m } -# ---------- Step 13: install cert-manager ---------- +# ---------- Step 17: Install cert-manager ---------- install_cert_manager() { [[ "${INSTALL_CERT_MANAGER}" == "true" ]] || return 0 @@ -420,9 +519,6 @@ install_cert_manager() { log "Installing cert-manager" - helm_repo_add_force jetstack https://charts.jetstack.io - helm repo update - kubectl_ns_apply "${CERT_MANAGER_NAMESPACE}" helm upgrade --install cert-manager jetstack/cert-manager \ @@ -433,12 +529,12 @@ install_cert_manager() { --wait \ --timeout 20m - kubectl -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager --timeout=20m - kubectl -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-cainjector --timeout=20m - kubectl -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-webhook --timeout=20m + "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager --timeout=20m + "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-cainjector --timeout=20m + "${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-webhook --timeout=20m } -# ---------- Step 14: install Rancher ---------- +# ---------- Step 18: Install Rancher ---------- install_rancher() { [[ "${INSTALL_RANCHER}" == "true" ]] || return 0 @@ -448,9 +544,6 @@ install_rancher() { echo "${RANCHER_BOOTSTRAP_PASSWORD}" >/root/rancher-bootstrap-password.txt chmod 600 /root/rancher-bootstrap-password.txt - helm_repo_add_force rancher-stable https://releases.rancher.com/server-charts/stable - helm repo update - kubectl_ns_apply "${RANCHER_NAMESPACE}" local -a rancher_args=( @@ -474,20 +567,20 @@ install_rancher() { rancher_args+=( --set letsEncrypt.email="${LETSENCRYPT_EMAIL}" ) fi - helm upgrade --install rancher rancher-stable/rancher "${rancher_args[@]}" + helm upgrade --install rancher "${RANCHER_CHART}" "${rancher_args[@]}" - kubectl -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher --timeout=30m + "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher --timeout=30m || true - if kubectl -n "${RANCHER_NAMESPACE}" get deployment rancher-webhook >/dev/null 2>&1; then - kubectl -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher-webhook --timeout=30m + if "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" get deployment rancher-webhook >/dev/null 2>&1; then + "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher-webhook --timeout=30m || true fi - if kubectl -n "${RANCHER_NAMESPACE}" get deployment cattle-cluster-agent >/dev/null 2>&1; then - kubectl -n "${RANCHER_NAMESPACE}" rollout status deployment/cattle-cluster-agent --timeout=30m || true + if "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" get deployment cattle-cluster-agent >/dev/null 2>&1; then + "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/cattle-cluster-agent --timeout=30m || true fi } -# ---------- Step 15: install Longhorn ---------- +# ---------- Step 19: Install Longhorn ---------- install_longhorn() { [[ "${INSTALL_LONGHORN}" == "true" ]] || return 0 @@ -496,9 +589,6 @@ install_longhorn() { systemctl enable --now iscsid || true - helm_repo_add_force longhorn https://charts.longhorn.io - helm repo update - kubectl_ns_apply "${LONGHORN_NAMESPACE}" helm upgrade --install longhorn longhorn/longhorn \ @@ -509,34 +599,60 @@ install_longhorn() { --wait \ --timeout 30m - kubectl -n "${LONGHORN_NAMESPACE}" get pods + "${KUBECTL_BIN}" -n "${LONGHORN_NAMESPACE}" get pods } -# ---------- Step 16: validation ---------- +# ---------- Step 20: Write cloudflared example ---------- + +write_cloudflared_example() { + [[ "${WRITE_CLOUDFLARED_EXAMPLE}" == "true" ]] || return 0 + [[ -n "${RANCHER_HOSTNAME}" ]] || return 0 + + log "Writing example cloudflared ingress file" + + mkdir -p /root/rancher-install-artifacts + + cat >/root/rancher-install-artifacts/cloudflared-config-example.yml </dev/null 2>&1 || command -v b2sum >/dev/null 2>&1 || die "Neither sha256sum nor b2sum is installed" validate_inputs install_base_packages @@ -588,22 +716,23 @@ main() { install_helm install_rke2 configure_rke2 + configure_crictl + reset_failed_rke2_bootstrap start_rke2 configure_kubeconfig allow_server_scheduling wait_for_core_system + configure_helm_repos install_ingress_nginx install_cert_manager install_rancher install_longhorn + write_cloudflared_example validate_install save_cluster_info echo - echo "RKE2 management cluster installation is complete." - echo - echo "RKE2 version:" - echo " ${RKE2_VERSION}" + echo "Installation complete." echo echo "Rancher URL:" echo " https://${RANCHER_HOSTNAME}" @@ -611,19 +740,13 @@ main() { echo "Bootstrap password file:" echo " /root/rancher-bootstrap-password.txt" echo - echo "RKE2 server token:" - echo " /var/lib/rancher/rke2/server/token" + echo "Cloudflared example file:" + echo " /root/rancher-install-artifacts/cloudflared-config-example.yml" echo - echo "kubectl configured for:" - echo " root: /root/.kube/config" - echo " ${REAL_USER}: ${REAL_KUBECONFIG_DIR}/config" - echo - if [[ "${INSTALL_LONGHORN}" == "true" ]]; then - echo "Longhorn is installed for persistent volumes." - echo - fi - echo "Next step:" - echo " Log into Rancher and create/import downstream clusters there." + echo "Next:" + echo " 1. Point your Cloudflare Tunnel hostname at Rancher." + echo " 2. Log into Rancher." + echo " 3. Create or import downstream clusters from Rancher." echo }