Update master_node_install.sh

This commit is contained in:
RomanNum3ral 2026-03-27 21:24:54 +00:00
parent 6b263a8b83
commit 2b6c45b951
1 changed files with 230 additions and 628 deletions

View File

@ -3,69 +3,37 @@ set -Eeuo pipefail
########################################
# Arch Linux Rancher Management Cluster
# RKE2 + Rancher + optional Longhorn
# + fixed RKE2 config generation
# + cloudflared-friendly Rancher hostname handling
# + bootstrap cleanup on failed first start
# Single-node RKE2 server + Rancher
#
# What this script does:
# - Disables swap
# - Installs required Arch packages
# - Ensures kernel modules/sysctl are set for Kubernetes
# - Configures NetworkManager to ignore CNI interfaces
# - Installs RKE2 server pinned to a Rancher-friendly 1.34 release
# - Waits for Kubernetes to become healthy
# - Installs cert-manager
# - Installs Rancher via Helm
# - Prints the Rancher URL and bootstrap password
#
# Optional environment variables:
# RKE2_VERSION=v1.34.5+rke2r1
# RANCHER_HOSTNAME=rancher.example.com
# BOOTSTRAP_PASSWORD=changeme
# RKE2_TOKEN=my-shared-secret
# INSTALL_RANCHER=true
########################################
# ---------- Config ----------
# RKE2
RKE2_CHANNEL="${RKE2_CHANNEL:-stable}"
RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
RKE2_TOKEN="${RKE2_TOKEN:-}"
CLUSTER_CIDR="${CLUSTER_CIDR:-192.168.0.0/16}"
SERVICE_CIDR="${SERVICE_CIDR:-10.43.0.0/16}"
CLUSTER_DNS="${CLUSTER_DNS:-10.43.0.10}"
CNI_PLUGIN="${CNI_PLUGIN:-canal}" # canal | calico | cilium | flannel
DISABLE_RKE2_INGRESS="${DISABLE_RKE2_INGRESS:-true}"
ALLOW_SCHEDULING_ON_SERVER="${ALLOW_SCHEDULING_ON_SERVER:-true}"
RESET_FAILED_BOOTSTRAP="${RESET_FAILED_BOOTSTRAP:-true}"
# Rancher
INSTALL_RANCHER="${INSTALL_RANCHER:-true}"
RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" # REQUIRED, e.g. rancher.example.com
RANCHER_NAMESPACE="${RANCHER_NAMESPACE:-cattle-system}"
RANCHER_REPO_CHANNEL="${RANCHER_REPO_CHANNEL:-stable}" # stable | latest | alpha
RANCHER_CHART_VERSION="${RANCHER_CHART_VERSION:-2.13.4}"
RANCHER_BOOTSTRAP_PASSWORD="${RANCHER_BOOTSTRAP_PASSWORD:-}"
RANCHER_REPLICAS="${RANCHER_REPLICAS:-1}"
RANCHER_TLS_SOURCE="${RANCHER_TLS_SOURCE:-rancher}" # rancher | letsEncrypt | secret
RANCHER_PRIVATE_CA="${RANCHER_PRIVATE_CA:-false}"
LETSENCRYPT_EMAIL="${LETSENCRYPT_EMAIL:-}"
# ingress-nginx
INSTALL_INGRESS_NGINX="${INSTALL_INGRESS_NGINX:-true}"
INGRESS_NAMESPACE="${INGRESS_NAMESPACE:-ingress-nginx}"
INGRESS_CLASS_NAME="${INGRESS_CLASS_NAME:-nginx}"
# cert-manager
INSTALL_CERT_MANAGER="${INSTALL_CERT_MANAGER:-true}"
CERT_MANAGER_NAMESPACE="${CERT_MANAGER_NAMESPACE:-cert-manager}"
CERT_MANAGER_CHART_VERSION="${CERT_MANAGER_CHART_VERSION:-v1.18.3}"
# Longhorn
INSTALL_LONGHORN="${INSTALL_LONGHORN:-true}"
LONGHORN_NAMESPACE="${LONGHORN_NAMESPACE:-longhorn-system}"
LONGHORN_CHART_VERSION="${LONGHORN_CHART_VERSION:-1.11.0}"
LONGHORN_DEFAULT_REPLICA_COUNT="${LONGHORN_DEFAULT_REPLICA_COUNT:-1}"
# Helm
INSTALL_HELM="${INSTALL_HELM:-true}"
HELM_VERSION="${HELM_VERSION:-v3.18.4}"
# cloudflared helper file only; does not install cloudflared
WRITE_CLOUDFLARED_EXAMPLE="${WRITE_CLOUDFLARED_EXAMPLE:-true}"
CLOUDFLARED_SERVICE_TARGET="${CLOUDFLARED_SERVICE_TARGET:-https://127.0.0.1}"
# User detection
REAL_USER="${SUDO_USER:-root}"
REAL_HOME="$(getent passwd "${REAL_USER}" | cut -d: -f6 || true)"
REAL_HOME="${REAL_HOME:-/root}"
REAL_KUBECONFIG_DIR="${REAL_HOME}/.kube"
# ---------- Logging ----------
BOOTSTRAP_PASSWORD="${BOOTSTRAP_PASSWORD:-adminadminadmin}"
RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}"
RKE2_CONFIG_DIR="/etc/rancher/rke2"
RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
RKE2_TOKEN_FILE="${RKE2_CONFIG_DIR}/server-token"
KUBECONFIG_FILE="/etc/rancher/rke2/rke2.yaml"
RANCHER_REPO_NAME="rancher-stable"
RANCHER_REPO_URL="https://releases.rancher.com/server-charts/stable"
log() {
echo
@ -85,143 +53,61 @@ die() {
exit 1
}
# ---------- Helpers ----------
require_root() {
[[ "${EUID}" -eq 0 ]] || die "Run as root: sudo ./master_node_install.sh"
}
require_cmd() {
command -v "$1" >/dev/null 2>&1 || die "Required command not found: $1"
}
retry() {
local attempts="$1"
local sleep_seconds="$2"
shift 2
local n=1
until "$@"; do
if (( n >= attempts )); then
return 1
fi
warn "Command failed (attempt ${n}/${attempts}): $*"
sleep "${sleep_seconds}"
((n++))
done
}
helm_repo_add_force() {
local name="$1"
local url="$2"
if helm repo list 2>/dev/null | awk 'NR>1 {print $1}' | grep -qx "${name}"; then
helm repo add "${name}" "${url}" --force-update >/dev/null
else
helm repo add "${name}" "${url}" >/dev/null
fi
}
kubectl_ns_apply() {
local ns="$1"
"${KUBECTL_BIN}" create namespace "${ns}" --dry-run=client -o yaml | "${KUBECTL_BIN}" apply -f -
}
write_file_if_changed() {
local path="$1"
local tmp
tmp="$(mktemp)"
cat > "${tmp}"
if [[ -f "${path}" ]] && cmp -s "${tmp}" "${path}"; then
rm -f "${tmp}"
return 0
fi
install -D -m 0644 "${tmp}" "${path}"
rm -f "${tmp}"
}
# ---------- Tool paths ----------
RKE2_BIN_DIR="/var/lib/rancher/rke2/bin"
KUBECTL_BIN="${RKE2_BIN_DIR}/kubectl"
CRICTL_BIN="${RKE2_BIN_DIR}/crictl"
KUBECONFIG_SYSTEM="/etc/rancher/rke2/rke2.yaml"
RKE2_CONFIG="/etc/rancher/rke2/config.yaml"
RKE2_SERVER_STATE_DIR="/var/lib/rancher/rke2/server"
RKE2_AGENT_LOG_DIR="/var/lib/rancher/rke2/agent/logs"
CRICTL_RUNTIME_ENDPOINT="unix:///run/k3s/containerd/containerd.sock"
CRICTL_IMAGE_ENDPOINT="unix:///run/k3s/containerd/containerd.sock"
# ---------- Validation ----------
validate_inputs() {
if [[ "${INSTALL_RANCHER}" == "true" && -z "${RANCHER_HOSTNAME}" ]]; then
die "RANCHER_HOSTNAME must be set, for example: export RANCHER_HOSTNAME=rancher.example.com"
fi
if [[ -z "${RKE2_TOKEN}" ]]; then
RKE2_TOKEN="$(openssl rand -hex 32)"
fi
if [[ "${INSTALL_RANCHER}" == "true" && -z "${RANCHER_BOOTSTRAP_PASSWORD}" ]]; then
RANCHER_BOOTSTRAP_PASSWORD="$(openssl rand -base64 24 | tr -d '\n' | tr '/+' 'AB' | cut -c1-20)"
fi
if [[ "${RANCHER_TLS_SOURCE}" == "letsEncrypt" && -z "${LETSENCRYPT_EMAIL}" ]]; then
die "LETSENCRYPT_EMAIL must be set when RANCHER_TLS_SOURCE=letsEncrypt"
fi
case "${RANCHER_REPO_CHANNEL}" in
stable|latest|alpha) ;;
*)
die "Invalid RANCHER_REPO_CHANNEL=${RANCHER_REPO_CHANNEL}. Use stable, latest, or alpha."
;;
esac
}
# ---------- Error trap ----------
on_error() {
local exit_code=$?
warn "Script failed on line $1 with exit code ${exit_code}"
local line_no=$1
warn "Script failed on line ${line_no} with exit code ${exit_code}"
warn "Useful diagnostics:"
echo " sudo systemctl status rke2-server -l --no-pager"
echo " sudo journalctl -u rke2-server -n 200 --no-pager"
echo " sudo tail -n 200 ${RKE2_AGENT_LOG_DIR}/kubelet.log"
echo " sudo ${CRICTL_BIN} --runtime-endpoint ${CRICTL_RUNTIME_ENDPOINT} ps -a"
echo " sudo ${CRICTL_BIN} --runtime-endpoint ${CRICTL_RUNTIME_ENDPOINT} pods"
echo " sudo tail -n 200 /var/lib/rancher/rke2/agent/logs/kubelet.log"
echo " sudo /var/lib/rancher/rke2/bin/crictl --runtime-endpoint unix:///run/k3s/containerd/containerd.sock ps -a"
echo " sudo /var/lib/rancher/rke2/bin/crictl --runtime-endpoint unix:///run/k3s/containerd/containerd.sock pods"
exit "${exit_code}"
}
trap 'on_error $LINENO' ERR
# ---------- Step 1: Install base packages ----------
install_base_packages() {
log "Installing required Arch packages"
pacman -Sy --noconfirm --needed \
ca-certificates \
curl \
tar \
gzip \
jq \
openssl \
unzip \
wget \
iptables-nft \
nftables \
conntrack-tools \
socat \
ethtool \
iproute2 \
bash-completion \
open-iscsi \
nfs-utils \
cni-plugins
systemctl enable --now iscsid || true
require_root() {
[[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0"
}
# ---------- Step 2: Disable swap ----------
wait_for_file() {
local file="$1"
local timeout="${2:-300}"
local waited=0
until [[ -f "${file}" ]]; do
sleep 2
waited=$((waited + 2))
if (( waited >= timeout )); then
die "Timed out waiting for file: ${file}"
fi
done
}
wait_for_cmd() {
local cmd="$1"
local timeout="${2:-600}"
local interval="${3:-5}"
local waited=0
until eval "${cmd}" >/dev/null 2>&1; do
sleep "${interval}"
waited=$((waited + interval))
if (( waited >= timeout )); then
die "Timed out waiting for command to succeed: ${cmd}"
fi
done
}
kubectl_rke2() {
/var/lib/rancher/rke2/bin/kubectl --kubeconfig "${KUBECONFIG_FILE}" "$@"
}
helm_rke2() {
helm --kubeconfig "${KUBECONFIG_FILE}" "$@"
}
disable_swap() {
log "Disabling swap"
@ -230,524 +116,240 @@ disable_swap() {
if [[ -f /etc/fstab ]]; then
cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
sed -ri '/\sswap\s/s/^/# DISABLED FOR KUBERNETES: /' /etc/fstab
sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rancher-script /' /etc/fstab
fi
}
# ---------- Step 3: Kernel modules and sysctl ----------
install_packages() {
log "Installing required Arch packages"
configure_kernel_networking() {
pacman -Sy --noconfirm archlinux-keyring
if pacman -Q iptables >/dev/null 2>&1; then
pacman -Rdd --noconfirm iptables || true
fi
pacman -Syu --noconfirm
pacman -S --needed --noconfirm \
bash-completion \
ca-certificates \
cni-plugins \
conntrack-tools \
curl \
ethtool \
gzip \
helm \
iproute2 \
iptables-nft \
jq \
nftables \
nfs-utils \
open-iscsi \
openssl \
socat \
tar \
unzip \
wget
}
configure_kernel() {
log "Configuring kernel modules and sysctl"
write_file_if_changed /etc/modules-load.d/rke2.conf <<'EOF'
cat >/etc/modules-load.d/k8s.conf <<'EOF'
overlay
br_netfilter
nf_conntrack
EOF
modprobe overlay
modprobe br_netfilter
modprobe nf_conntrack || true
write_file_if_changed /etc/sysctl.d/90-rke2.conf <<'EOF'
net.ipv4.ip_forward = 1
cat >/etc/sysctl.d/90-kubernetes.conf <<'EOF'
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
fs.inotify.max_user_instances = 8192
fs.inotify.max_user_watches = 1048576
vm.max_map_count = 262144
net.ipv4.ip_forward = 1
EOF
sysctl --system
sysctl --system >/dev/null
}
# ---------- Step 4: firewalld ----------
configure_networkmanager() {
if systemctl is-enabled NetworkManager >/dev/null 2>&1 || systemctl is-active NetworkManager >/dev/null 2>&1; then
log "Configuring NetworkManager to ignore CNI interfaces"
handle_firewall() {
log "Checking for firewalld"
mkdir -p /etc/NetworkManager/conf.d
cat >/etc/NetworkManager/conf.d/rke2-cni.conf <<'EOF'
[keyfile]
unmanaged-devices=interface-name:cali*;interface-name:flannel*;interface-name:cni*;interface-name:vxlan.calico;interface-name:kube-ipvs0;interface-name:nodelocaldns;interface-name:tunl*
EOF
if systemctl list-unit-files 2>/dev/null | grep -q '^firewalld\.service'; then
if systemctl is-enabled firewalld >/dev/null 2>&1 || systemctl is-active firewalld >/dev/null 2>&1; then
warn "firewalld is active or enabled; disabling it for RKE2 compatibility"
systemctl disable --now firewalld || true
fi
systemctl restart NetworkManager
fi
}
# ---------- Step 5: Install Helm ----------
enable_support_services() {
log "Enabling support services"
install_helm() {
[[ "${INSTALL_HELM}" == "true" ]] || return 0
log "Installing Helm ${HELM_VERSION}"
local tmpdir arch
tmpdir="$(mktemp -d)"
arch="$(uname -m)"
case "${arch}" in
x86_64) arch="amd64" ;;
aarch64) arch="arm64" ;;
*) die "Unsupported architecture for Helm: ${arch}" ;;
esac
curl -fsSL -o "${tmpdir}/helm.tar.gz" \
"https://get.helm.sh/helm-${HELM_VERSION}-linux-${arch}.tar.gz"
tar -xzf "${tmpdir}/helm.tar.gz" -C "${tmpdir}"
install -m 0755 "${tmpdir}/linux-${arch}/helm" /usr/local/bin/helm
rm -rf "${tmpdir}"
helm version
systemctl enable --now iscsid.service || true
systemctl enable --now nftables.service || true
}
# ---------- Step 6: Install RKE2 ----------
install_rke2() {
log "Installing RKE2 ${RKE2_VERSION}"
log "Installing RKE2 server ${RKE2_VERSION}"
export INSTALL_RKE2_CHANNEL="${RKE2_CHANNEL}"
export INSTALL_RKE2_METHOD="tar"
export INSTALL_RKE2_VERSION="${RKE2_VERSION}"
mkdir -p "${RKE2_CONFIG_DIR}"
curl -sfL https://get.rke2.io | sh -
}
# ---------- Step 7: Detect node info ----------
detect_node_name() {
local name=""
name="$(hostnamectl --static 2>/dev/null || true)"
[[ -n "${name}" ]] || name="$(uname -n)"
[[ -n "${name}" ]] || die "Failed to determine node name"
printf '%s\n' "${name}"
}
detect_node_ip() {
local ip=""
ip="$(ip -4 route get 1.1.1.1 2>/dev/null | awk '{for(i=1;i<=NF;i++) if ($i=="src") {print $(i+1); exit}}')"
[[ -n "${ip}" ]] || ip="$(hostname -I 2>/dev/null | awk '{print $1}')"
[[ -n "${ip}" ]] || die "Failed to determine node IP"
printf '%s\n' "${ip}"
}
# ---------- Step 8: Configure RKE2 ----------
configure_rke2() {
log "Writing ${RKE2_CONFIG}"
mkdir -p /etc/rancher/rke2
local node_ip node_name
node_ip="$(detect_node_ip)"
node_name="$(detect_node_name)"
{
echo "token: ${RKE2_TOKEN}"
echo 'write-kubeconfig-mode: "0644"'
echo "node-name: ${node_name}"
echo "tls-san:"
echo " - ${node_ip}"
echo " - 127.0.0.1"
echo "cluster-cidr: ${CLUSTER_CIDR}"
echo "service-cidr: ${SERVICE_CIDR}"
echo "cluster-dns: ${CLUSTER_DNS}"
echo "cni: ${CNI_PLUGIN}"
echo "etcd-expose-metrics: true"
if [[ "${DISABLE_RKE2_INGRESS}" == "true" ]]; then
echo "disable:"
echo " - rke2-ingress-nginx"
fi
} > "${RKE2_CONFIG}"
}
# ---------- Step 9: Write crictl config ----------
configure_crictl() {
log "Writing /etc/crictl.yaml"
cat >/etc/crictl.yaml <<EOF
runtime-endpoint: ${CRICTL_RUNTIME_ENDPOINT}
image-endpoint: ${CRICTL_IMAGE_ENDPOINT}
timeout: 10
debug: false
pull-image-on-create: false
EOF
}
# ---------- Step 10: Reset failed bootstrap if needed ----------
reset_failed_rke2_bootstrap() {
[[ "${RESET_FAILED_BOOTSTRAP}" == "true" ]] || return 0
if [[ -d "${RKE2_SERVER_STATE_DIR}/db" ]]; then
warn "Removing previous failed RKE2 bootstrap state"
systemctl stop rke2-server || true
rm -rf "${RKE2_SERVER_STATE_DIR}/db"
rm -f "${RKE2_AGENT_LOG_DIR}/kubelet.log" || true
if [[ -n "${RKE2_TOKEN:-}" ]]; then
printf '%s\n' "${RKE2_TOKEN}" > "${RKE2_TOKEN_FILE}"
chmod 600 "${RKE2_TOKEN_FILE}"
elif [[ ! -f "${RKE2_TOKEN_FILE}" ]]; then
openssl rand -hex 24 > "${RKE2_TOKEN_FILE}"
chmod 600 "${RKE2_TOKEN_FILE}"
fi
}
# ---------- Step 11: Start RKE2 ----------
local token
token="$(<"${RKE2_TOKEN_FILE}")"
start_rke2() {
log "Starting rke2-server"
cat >"${RKE2_CONFIG_FILE}" <<EOF
token: ${token}
write-kubeconfig-mode: "0644"
tls-san:
- ${RANCHER_HOSTNAME:-127.0.0.1}
EOF
curl -sfL https://get.rke2.io | INSTALL_RKE2_TYPE=server INSTALL_RKE2_VERSION="${RKE2_VERSION}" sh -
mkdir -p /etc/profile.d
cat >/etc/profile.d/rke2-path.sh <<'EOF'
export PATH=$PATH:/var/lib/rancher/rke2/bin:/usr/local/bin
export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
EOF
systemctl daemon-reload
systemctl enable --now rke2-server
log "Waiting for rke2-server service to reach active state"
retry 90 5 systemctl is-active --quiet rke2-server || {
journalctl -u rke2-server --no-pager -n 200 || true
[[ -f "${RKE2_AGENT_LOG_DIR}/kubelet.log" ]] && tail -n 200 "${RKE2_AGENT_LOG_DIR}/kubelet.log" || true
die "rke2-server did not become active"
}
export PATH="${RKE2_BIN_DIR}:${PATH}"
export KUBECONFIG="${KUBECONFIG_SYSTEM}"
log "Waiting for Kubernetes API"
retry 90 5 "${KUBECTL_BIN}" get nodes >/dev/null 2>&1 || {
journalctl -u rke2-server --no-pager -n 200 || true
[[ -f "${RKE2_AGENT_LOG_DIR}/kubelet.log" ]] && tail -n 200 "${RKE2_AGENT_LOG_DIR}/kubelet.log" || true
die "Kubernetes API did not become ready"
}
systemctl enable rke2-server.service
systemctl restart rke2-server.service
}
# ---------- Step 12: Configure kubeconfig ----------
wait_for_rke2() {
log "Waiting for RKE2 and Kubernetes API"
configure_kubeconfig() {
log "Configuring kubeconfig for root and user"
wait_for_file "${KUBECONFIG_FILE}" 600
wait_for_file "/var/lib/rancher/rke2/bin/kubectl" 600
mkdir -p /root/.kube
cp -f "${KUBECONFIG_SYSTEM}" /root/.kube/config
chmod 600 /root/.kube/config
wait_for_cmd "kubectl_rke2 get --raw=/readyz" 900 5
if [[ -n "${REAL_HOME}" && -d "${REAL_HOME}" ]]; then
mkdir -p "${REAL_KUBECONFIG_DIR}"
cp -f "${KUBECONFIG_SYSTEM}" "${REAL_KUBECONFIG_DIR}/config"
chown -R "${REAL_USER}:${REAL_USER}" "${REAL_KUBECONFIG_DIR}"
chmod 600 "${REAL_KUBECONFIG_DIR}/config"
log "Waiting for local node to become Ready"
wait_for_cmd '[[ "$(kubectl_rke2 get node "$(hostname -s)" -o jsonpath="{.status.conditions[?(@.type==\"Ready\")].status}" 2>/dev/null || true)" == "True" ]]' 1200 5
log "Waiting for system pods to settle"
wait_for_cmd 'kubectl_rke2 -n kube-system get pods >/dev/null 2>&1' 300 5
kubectl_rke2 -n kube-system wait --for=condition=Ready pods --all --timeout=20m || true
}
resolve_hostname() {
if [[ -n "${RANCHER_HOSTNAME}" ]]; then
return
fi
local detected_ip=""
detected_ip="$(ip route get 1.1.1.1 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i=="src") {print $(i+1); exit}}')"
[[ -n "${detected_ip}" ]] || die "Could not auto-detect server IP. Set RANCHER_HOSTNAME manually."
RANCHER_HOSTNAME="${detected_ip}.sslip.io"
log "Auto-detected Rancher hostname: ${RANCHER_HOSTNAME}"
}
# ---------- Step 13: Allow scheduling on server ----------
allow_server_scheduling() {
if [[ "${ALLOW_SCHEDULING_ON_SERVER}" == "true" ]]; then
log "Removing control-plane taints for single-node scheduling"
"${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/control-plane- || true
"${KUBECTL_BIN}" taint nodes --all node-role.kubernetes.io/master- || true
fi
}
# ---------- Step 14: Wait for core system ----------
wait_for_core_system() {
log "Waiting for core system pods"
retry 90 5 "${KUBECTL_BIN}" get nodes >/dev/null 2>&1
retry 90 5 "${KUBECTL_BIN}" -n kube-system get pods >/dev/null 2>&1
"${KUBECTL_BIN}" get nodes -o wide
echo
"${KUBECTL_BIN}" get pods -A
}
# ---------- Step 15: Helm repos ----------
configure_helm_repos() {
[[ "${INSTALL_HELM}" == "true" ]] || return 0
log "Configuring Helm repositories"
helm_repo_add_force ingress-nginx https://kubernetes.github.io/ingress-nginx
helm_repo_add_force jetstack https://charts.jetstack.io
helm_repo_add_force longhorn https://charts.longhorn.io
case "${RANCHER_REPO_CHANNEL}" in
stable)
helm_repo_add_force rancher-stable https://releases.rancher.com/server-charts/stable
RANCHER_CHART="rancher-stable/rancher"
;;
latest)
helm_repo_add_force rancher-latest https://releases.rancher.com/server-charts/latest
RANCHER_CHART="rancher-latest/rancher"
;;
alpha)
helm_repo_add_force rancher-alpha https://releases.rancher.com/server-charts/alpha
RANCHER_CHART="rancher-alpha/rancher"
;;
esac
helm repo update
}
# ---------- Step 16: Install ingress-nginx ----------
install_ingress_nginx() {
[[ "${INSTALL_INGRESS_NGINX}" == "true" ]] || return 0
log "Installing ingress-nginx"
kubectl_ns_apply "${INGRESS_NAMESPACE}"
helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx \
--namespace "${INGRESS_NAMESPACE}" \
--create-namespace \
--set controller.kind=DaemonSet \
--set controller.hostNetwork=true \
--set controller.dnsPolicy=ClusterFirstWithHostNet \
--set controller.service.type=ClusterIP \
--set controller.ingressClass="${INGRESS_CLASS_NAME}" \
--set controller.ingressClassResource.name="${INGRESS_CLASS_NAME}" \
--set controller.ingressClassResource.default=true \
--set controller.watchIngressWithoutClass=true \
--set controller.reportNodeInternalIp=true \
--wait \
--timeout 20m
"${KUBECTL_BIN}" -n "${INGRESS_NAMESPACE}" rollout status daemonset/ingress-nginx-controller --timeout=20m
}
# ---------- Step 17: Install cert-manager ----------
install_cert_manager() {
[[ "${INSTALL_CERT_MANAGER}" == "true" ]] || return 0
[[ "${INSTALL_RANCHER}" == "true" ]] || return 0
[[ "${RANCHER_TLS_SOURCE}" != "secret" ]] || return 0
log "Installing cert-manager"
kubectl_ns_apply "${CERT_MANAGER_NAMESPACE}"
helm repo add jetstack https://charts.jetstack.io >/dev/null 2>&1 || true
helm repo update >/dev/null
helm upgrade --install cert-manager jetstack/cert-manager \
--namespace "${CERT_MANAGER_NAMESPACE}" \
local cert_manager_app_version=""
cert_manager_app_version="$(helm show chart jetstack/cert-manager | awk '/^appVersion:/ {print $2; exit}')"
[[ -n "${cert_manager_app_version}" ]] || die "Could not determine cert-manager appVersion"
kubectl_rke2 create namespace cert-manager --dry-run=client -o yaml | kubectl_rke2 apply -f -
kubectl_rke2 apply -f "https://github.com/cert-manager/cert-manager/releases/download/${cert_manager_app_version}/cert-manager.crds.yaml"
helm_rke2 upgrade --install cert-manager jetstack/cert-manager \
--namespace cert-manager \
--create-namespace \
--version "${CERT_MANAGER_CHART_VERSION}" \
--set crds.enabled=true \
--wait \
--timeout 20m
"${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager --timeout=20m
"${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-cainjector --timeout=20m
"${KUBECTL_BIN}" -n "${CERT_MANAGER_NAMESPACE}" rollout status deployment/cert-manager-webhook --timeout=20m
kubectl_rke2 -n cert-manager rollout status deploy/cert-manager --timeout=20m
kubectl_rke2 -n cert-manager rollout status deploy/cert-manager-webhook --timeout=20m
kubectl_rke2 -n cert-manager rollout status deploy/cert-manager-cainjector --timeout=20m
}
# ---------- Step 18: Install Rancher ----------
install_rancher() {
[[ "${INSTALL_RANCHER}" == "true" ]] || return 0
if [[ "${INSTALL_RANCHER}" != "true" ]]; then
warn "INSTALL_RANCHER=false, skipping Rancher install"
return
fi
resolve_hostname
log "Installing Rancher"
echo "${RANCHER_BOOTSTRAP_PASSWORD}" >/root/rancher-bootstrap-password.txt
chmod 600 /root/rancher-bootstrap-password.txt
helm repo add "${RANCHER_REPO_NAME}" "${RANCHER_REPO_URL}" >/dev/null 2>&1 || true
helm repo update >/dev/null
kubectl_ns_apply "${RANCHER_NAMESPACE}"
kubectl_rke2 create namespace cattle-system --dry-run=client -o yaml | kubectl_rke2 apply -f -
local -a rancher_args=(
--namespace "${RANCHER_NAMESPACE}"
--create-namespace
--version "${RANCHER_CHART_VERSION}"
--set hostname="${RANCHER_HOSTNAME}"
--set bootstrapPassword="${RANCHER_BOOTSTRAP_PASSWORD}"
--set replicas="${RANCHER_REPLICAS}"
--set ingress.ingressClassName="${INGRESS_CLASS_NAME}"
--set ingress.tls.source="${RANCHER_TLS_SOURCE}"
--wait
--timeout 30m
)
if [[ "${RANCHER_PRIVATE_CA}" == "true" ]]; then
rancher_args+=( --set privateCA=true )
fi
if [[ "${RANCHER_TLS_SOURCE}" == "letsEncrypt" ]]; then
rancher_args+=( --set letsEncrypt.email="${LETSENCRYPT_EMAIL}" )
fi
helm upgrade --install rancher "${RANCHER_CHART}" "${rancher_args[@]}"
"${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher --timeout=30m || true
if "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" get deployment rancher-webhook >/dev/null 2>&1; then
"${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/rancher-webhook --timeout=30m || true
fi
if "${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" get deployment cattle-cluster-agent >/dev/null 2>&1; then
"${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" rollout status deployment/cattle-cluster-agent --timeout=30m || true
fi
}
# ---------- Step 19: Install Longhorn ----------
install_longhorn() {
[[ "${INSTALL_LONGHORN}" == "true" ]] || return 0
log "Installing Longhorn"
systemctl enable --now iscsid || true
kubectl_ns_apply "${LONGHORN_NAMESPACE}"
helm upgrade --install longhorn longhorn/longhorn \
--namespace "${LONGHORN_NAMESPACE}" \
--create-namespace \
--version "${LONGHORN_CHART_VERSION}" \
--set defaultSettings.defaultReplicaCount="${LONGHORN_DEFAULT_REPLICA_COUNT}" \
helm_rke2 upgrade --install rancher "${RANCHER_REPO_NAME}/rancher" \
--namespace cattle-system \
--set hostname="${RANCHER_HOSTNAME}" \
--set bootstrapPassword="${BOOTSTRAP_PASSWORD}" \
--set replicas=1 \
--set ingress.tls.source=rancher \
--wait \
--timeout 30m
"${KUBECTL_BIN}" -n "${LONGHORN_NAMESPACE}" get pods
kubectl_rke2 -n cattle-system rollout status deploy/rancher --timeout=30m
}
# ---------- Step 20: Write cloudflared example ----------
print_summary() {
local node_token=""
local bootstrap_secret_password=""
write_cloudflared_example() {
[[ "${WRITE_CLOUDFLARED_EXAMPLE}" == "true" ]] || return 0
[[ -n "${RANCHER_HOSTNAME}" ]] || return 0
log "Writing example cloudflared ingress file"
mkdir -p /root/rancher-install-artifacts
cat >/root/rancher-install-artifacts/cloudflared-config-example.yml <<EOF
# Example only. Adjust tunnel ID, credentials path, and service target.
tunnel: YOUR_TUNNEL_ID
credentials-file: /etc/cloudflared/YOUR_TUNNEL_ID.json
ingress:
- hostname: ${RANCHER_HOSTNAME}
service: ${CLOUDFLARED_SERVICE_TARGET}
originRequest:
noTLSVerify: true
- service: http_status:404
EOF
}
# ---------- Step 21: Validation ----------
validate_install() {
log "Validation"
"${KUBECTL_BIN}" get nodes -o wide || true
echo
"${KUBECTL_BIN}" get pods -A || true
echo
"${KUBECTL_BIN}" get ingress -A || true
echo
"${KUBECTL_BIN}" get svc -A || true
echo
"${KUBECTL_BIN}" get sc || true
if [[ "${INSTALL_RANCHER}" == "true" ]]; then
echo
"${KUBECTL_BIN}" -n "${RANCHER_NAMESPACE}" get all || true
if [[ -f /var/lib/rancher/rke2/server/node-token ]]; then
node_token="$(</var/lib/rancher/rke2/server/node-token)"
else
node_token="$(<"${RKE2_TOKEN_FILE}")"
fi
if [[ "${INSTALL_LONGHORN}" == "true" ]]; then
echo
"${KUBECTL_BIN}" -n "${LONGHORN_NAMESPACE}" get all || true
fi
bootstrap_secret_password="$(kubectl_rke2 get secret -n cattle-system bootstrap-secret -o go-template='{{ .data.bootstrapPassword|base64decode }}' 2>/dev/null || true)"
log "Installation complete"
echo "RKE2 version: ${RKE2_VERSION}"
echo "Kubeconfig: ${KUBECONFIG_FILE}"
echo "Node token: ${node_token}"
echo "Rancher URL: https://${RANCHER_HOSTNAME}"
echo "Bootstrap pw: ${bootstrap_secret_password:-${BOOTSTRAP_PASSWORD}}"
echo
echo "Useful commands:"
echo " export KUBECONFIG=${KUBECONFIG_FILE}"
echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide"
echo " /var/lib/rancher/rke2/bin/kubectl get pods -A"
echo " sudo systemctl status rke2-server --no-pager"
}
# ---------- Step 22: Save info ----------
save_cluster_info() {
log "Saving useful cluster information"
mkdir -p /root/rancher-install-artifacts
cat >/root/rancher-install-artifacts/README.txt <<EOF
RKE2 server token:
/var/lib/rancher/rke2/server/token
RKE2 config:
${RKE2_CONFIG}
System kubeconfig:
${KUBECONFIG_SYSTEM}
Root kubeconfig:
/root/.kube/config
User kubeconfig:
${REAL_KUBECONFIG_DIR}/config
Rancher bootstrap password:
/root/rancher-bootstrap-password.txt
Rancher URL:
https://${RANCHER_HOSTNAME}
crictl config:
/etc/crictl.yaml
Useful commands:
systemctl status rke2-server --no-pager -l
journalctl -u rke2-server -f
${KUBECTL_BIN} get nodes -o wide
${KUBECTL_BIN} get pods -A
${CRICTL_BIN} ps -a
${CRICTL_BIN} pods
Cloudflared example:
/root/rancher-install-artifacts/cloudflared-config-example.yml
EOF
}
# ---------- Main ----------
main() {
require_root
require_cmd curl
require_cmd sed
require_cmd awk
require_cmd openssl
require_cmd ip
command -v sha256sum >/dev/null 2>&1 || command -v b2sum >/dev/null 2>&1 || die "Neither sha256sum nor b2sum is installed"
validate_inputs
install_base_packages
disable_swap
configure_kernel_networking
handle_firewall
install_helm
install_packages
configure_kernel
configure_networkmanager
enable_support_services
install_rke2
configure_rke2
configure_crictl
reset_failed_rke2_bootstrap
start_rke2
configure_kubeconfig
allow_server_scheduling
wait_for_core_system
configure_helm_repos
install_ingress_nginx
wait_for_rke2
install_cert_manager
install_rancher
install_longhorn
write_cloudflared_example
validate_install
save_cluster_info
echo
echo "Installation complete."
echo
echo "Rancher URL:"
echo " https://${RANCHER_HOSTNAME}"
echo
echo "Bootstrap password file:"
echo " /root/rancher-bootstrap-password.txt"
echo
echo "Cloudflared example file:"
echo " /root/rancher-install-artifacts/cloudflared-config-example.yml"
echo
echo "Next:"
echo " 1. Point your Cloudflare Tunnel hostname at Rancher."
echo " 2. Log into Rancher."
echo " 3. Create or import downstream clusters from Rancher."
echo
print_summary
}
main "$@"