Update master_node_install.sh

This commit is contained in:
RomanNum3ral 2026-03-27 21:40:31 +00:00
parent 2b6c45b951
commit 71f78fe9b6
1 changed files with 57 additions and 91 deletions

View File

@ -4,33 +4,14 @@ set -Eeuo pipefail
######################################## ########################################
# Arch Linux Rancher Management Cluster # Arch Linux Rancher Management Cluster
# Single-node RKE2 server + Rancher # Single-node RKE2 server + Rancher
#
# What this script does:
# - Disables swap
# - Installs required Arch packages
# - Ensures kernel modules/sysctl are set for Kubernetes
# - Configures NetworkManager to ignore CNI interfaces
# - Installs RKE2 server pinned to a Rancher-friendly 1.34 release
# - Waits for Kubernetes to become healthy
# - Installs cert-manager
# - Installs Rancher via Helm
# - Prints the Rancher URL and bootstrap password
#
# Optional environment variables:
# RKE2_VERSION=v1.34.5+rke2r1
# RANCHER_HOSTNAME=rancher.example.com
# BOOTSTRAP_PASSWORD=changeme
# RKE2_TOKEN=my-shared-secret
# INSTALL_RANCHER=true
######################################## ########################################
RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}" RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
INSTALL_RANCHER="${INSTALL_RANCHER:-true}" INSTALL_RANCHER="${INSTALL_RANCHER:-true}"
BOOTSTRAP_PASSWORD="${BOOTSTRAP_PASSWORD:-adminadminadmin}" BOOTSTRAP_PASSWORD="${BOOTSTRAP_PASSWORD:-V1P4F7uaqpAFHsVzLX6M}"
RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}" RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-rancher.fortis-scientia.com}"
RKE2_CONFIG_DIR="/etc/rancher/rke2" RKE2_CONFIG_DIR="/etc/rancher/rke2"
RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml" RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
RKE2_TOKEN_FILE="${RKE2_CONFIG_DIR}/server-token"
KUBECONFIG_FILE="/etc/rancher/rke2/rke2.yaml" KUBECONFIG_FILE="/etc/rancher/rke2/rke2.yaml"
RANCHER_REPO_NAME="rancher-stable" RANCHER_REPO_NAME="rancher-stable"
RANCHER_REPO_URL="https://releases.rancher.com/server-charts/stable" RANCHER_REPO_URL="https://releases.rancher.com/server-charts/stable"
@ -56,14 +37,12 @@ die() {
on_error() { on_error() {
local exit_code=$? local exit_code=$?
local line_no=$1 local line_no=$1
warn "Script failed on line ${line_no} with exit code ${exit_code}" warn "Script failed on line ${line_no} with exit code ${exit_code}"
warn "Useful diagnostics:" warn "Useful diagnostics:"
echo " sudo systemctl status rke2-server -l --no-pager" echo " sudo systemctl status rke2-server -l --no-pager"
echo " sudo journalctl -u rke2-server -n 200 --no-pager" echo " sudo journalctl -u rke2-server -n 200 --no-pager"
echo " sudo tail -n 200 /var/lib/rancher/rke2/agent/logs/kubelet.log" echo " sudo /var/lib/rancher/rke2/bin/kubectl --kubeconfig ${KUBECONFIG_FILE} get nodes -o wide"
echo " sudo /var/lib/rancher/rke2/bin/crictl --runtime-endpoint unix:///run/k3s/containerd/containerd.sock ps -a" echo " sudo /var/lib/rancher/rke2/bin/kubectl --kubeconfig ${KUBECONFIG_FILE} get pods -A"
echo " sudo /var/lib/rancher/rke2/bin/crictl --runtime-endpoint unix:///run/k3s/containerd/containerd.sock pods"
exit "${exit_code}" exit "${exit_code}"
} }
trap 'on_error $LINENO' ERR trap 'on_error $LINENO' ERR
@ -72,35 +51,6 @@ require_root() {
[[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0" [[ "${EUID}" -eq 0 ]] || die "Run this script as root: sudo $0"
} }
wait_for_file() {
local file="$1"
local timeout="${2:-300}"
local waited=0
until [[ -f "${file}" ]]; do
sleep 2
waited=$((waited + 2))
if (( waited >= timeout )); then
die "Timed out waiting for file: ${file}"
fi
done
}
wait_for_cmd() {
local cmd="$1"
local timeout="${2:-600}"
local interval="${3:-5}"
local waited=0
until eval "${cmd}" >/dev/null 2>&1; do
sleep "${interval}"
waited=$((waited + interval))
if (( waited >= timeout )); then
die "Timed out waiting for command to succeed: ${cmd}"
fi
done
}
kubectl_rke2() { kubectl_rke2() {
/var/lib/rancher/rke2/bin/kubectl --kubeconfig "${KUBECONFIG_FILE}" "$@" /var/lib/rancher/rke2/bin/kubectl --kubeconfig "${KUBECONFIG_FILE}" "$@"
} }
@ -111,9 +61,7 @@ helm_rke2() {
disable_swap() { disable_swap() {
log "Disabling swap" log "Disabling swap"
swapoff -a || true swapoff -a || true
if [[ -f /etc/fstab ]]; then if [[ -f /etc/fstab ]]; then
cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)" cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rancher-script /' /etc/fstab sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rancher-script /' /etc/fstab
@ -122,7 +70,6 @@ disable_swap() {
install_packages() { install_packages() {
log "Installing required Arch packages" log "Installing required Arch packages"
pacman -Sy --noconfirm archlinux-keyring pacman -Sy --noconfirm archlinux-keyring
if pacman -Q iptables >/dev/null 2>&1; then if pacman -Q iptables >/dev/null 2>&1; then
@ -184,11 +131,17 @@ EOF
systemctl restart NetworkManager systemctl restart NetworkManager
fi fi
if systemctl list-unit-files | grep -q '^nm-cloud-setup.service'; then
systemctl disable --now nm-cloud-setup.service || true
fi
if systemctl list-unit-files | grep -q '^nm-cloud-setup.timer'; then
systemctl disable --now nm-cloud-setup.timer || true
fi
} }
enable_support_services() { enable_support_services() {
log "Enabling support services" log "Enabling support services"
systemctl enable --now iscsid.service || true systemctl enable --now iscsid.service || true
systemctl enable --now nftables.service || true systemctl enable --now nftables.service || true
} }
@ -198,19 +151,7 @@ install_rke2() {
mkdir -p "${RKE2_CONFIG_DIR}" mkdir -p "${RKE2_CONFIG_DIR}"
if [[ -n "${RKE2_TOKEN:-}" ]]; then
printf '%s\n' "${RKE2_TOKEN}" > "${RKE2_TOKEN_FILE}"
chmod 600 "${RKE2_TOKEN_FILE}"
elif [[ ! -f "${RKE2_TOKEN_FILE}" ]]; then
openssl rand -hex 24 > "${RKE2_TOKEN_FILE}"
chmod 600 "${RKE2_TOKEN_FILE}"
fi
local token
token="$(<"${RKE2_TOKEN_FILE}")"
cat >"${RKE2_CONFIG_FILE}" <<EOF cat >"${RKE2_CONFIG_FILE}" <<EOF
token: ${token}
write-kubeconfig-mode: "0644" write-kubeconfig-mode: "0644"
tls-san: tls-san:
- ${RANCHER_HOSTNAME:-127.0.0.1} - ${RANCHER_HOSTNAME:-127.0.0.1}
@ -232,16 +173,55 @@ EOF
wait_for_rke2() { wait_for_rke2() {
log "Waiting for RKE2 and Kubernetes API" log "Waiting for RKE2 and Kubernetes API"
wait_for_file "${KUBECONFIG_FILE}" 600 local waited=0
wait_for_file "/var/lib/rancher/rke2/bin/kubectl" 600 until [[ -f "${KUBECONFIG_FILE}" ]]; do
sleep 2
waited=$((waited + 2))
if (( waited >= 600 )); then
journalctl -u rke2-server -n 200 --no-pager || true
die "Timed out waiting for ${KUBECONFIG_FILE}"
fi
done
wait_for_cmd "kubectl_rke2 get --raw=/readyz" 900 5 waited=0
until /var/lib/rancher/rke2/bin/kubectl --kubeconfig "${KUBECONFIG_FILE}" get --raw=/readyz >/dev/null 2>&1; do
sleep 5
waited=$((waited + 5))
if (( waited % 30 == 0 )); then
warn "Kubernetes API not ready yet; latest rke2-server logs:"
journalctl -u rke2-server -n 40 --no-pager || true
fi
if (( waited >= 900 )); then
journalctl -u rke2-server -n 200 --no-pager || true
die "Timed out waiting for Kubernetes API readiness"
fi
done
log "Waiting for local node to become Ready" log "Waiting for any node to become Ready"
wait_for_cmd '[[ "$(kubectl_rke2 get node "$(hostname -s)" -o jsonpath="{.status.conditions[?(@.type==\"Ready\")].status}" 2>/dev/null || true)" == "True" ]]' 1200 5
waited=0
until kubectl_rke2 get nodes -o json 2>/dev/null | jq -e '
.items | length > 0 and any(.[]; any(.status.conditions[]?; .type=="Ready" and .status=="True"))
' >/dev/null; do
sleep 5
waited=$((waited + 5))
if (( waited % 30 == 0 )); then
warn "No Ready node yet; current status:"
kubectl_rke2 get nodes -o wide || true
kubectl_rke2 get pods -A || true
journalctl -u rke2-server -n 60 --no-pager || true
fi
if (( waited >= 1200 )); then
kubectl_rke2 get nodes -o wide || true
kubectl_rke2 get pods -A || true
journalctl -u rke2-server -n 200 --no-pager || true
die "Timed out waiting for a Ready node"
fi
done
log "Waiting for system pods to settle" log "Waiting for system pods to settle"
wait_for_cmd 'kubectl_rke2 -n kube-system get pods >/dev/null 2>&1' 300 5 kubectl_rke2 get nodes -o wide || true
kubectl_rke2 get pods -A || true
kubectl_rke2 -n kube-system wait --for=condition=Ready pods --all --timeout=20m || true kubectl_rke2 -n kube-system wait --for=condition=Ready pods --all --timeout=20m || true
} }
@ -252,7 +232,6 @@ resolve_hostname() {
local detected_ip="" local detected_ip=""
detected_ip="$(ip route get 1.1.1.1 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i=="src") {print $(i+1); exit}}')" detected_ip="$(ip route get 1.1.1.1 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i=="src") {print $(i+1); exit}}')"
[[ -n "${detected_ip}" ]] || die "Could not auto-detect server IP. Set RANCHER_HOSTNAME manually." [[ -n "${detected_ip}" ]] || die "Could not auto-detect server IP. Set RANCHER_HOSTNAME manually."
RANCHER_HOSTNAME="${detected_ip}.sslip.io" RANCHER_HOSTNAME="${detected_ip}.sslip.io"
@ -270,7 +249,6 @@ install_cert_manager() {
[[ -n "${cert_manager_app_version}" ]] || die "Could not determine cert-manager appVersion" [[ -n "${cert_manager_app_version}" ]] || die "Could not determine cert-manager appVersion"
kubectl_rke2 create namespace cert-manager --dry-run=client -o yaml | kubectl_rke2 apply -f - kubectl_rke2 create namespace cert-manager --dry-run=client -o yaml | kubectl_rke2 apply -f -
kubectl_rke2 apply -f "https://github.com/cert-manager/cert-manager/releases/download/${cert_manager_app_version}/cert-manager.crds.yaml" kubectl_rke2 apply -f "https://github.com/cert-manager/cert-manager/releases/download/${cert_manager_app_version}/cert-manager.crds.yaml"
helm_rke2 upgrade --install cert-manager jetstack/cert-manager \ helm_rke2 upgrade --install cert-manager jetstack/cert-manager \
@ -285,10 +263,7 @@ install_cert_manager() {
} }
install_rancher() { install_rancher() {
if [[ "${INSTALL_RANCHER}" != "true" ]]; then [[ "${INSTALL_RANCHER}" == "true" ]] || return
warn "INSTALL_RANCHER=false, skipping Rancher install"
return
fi
resolve_hostname resolve_hostname
@ -317,25 +292,16 @@ print_summary() {
if [[ -f /var/lib/rancher/rke2/server/node-token ]]; then if [[ -f /var/lib/rancher/rke2/server/node-token ]]; then
node_token="$(</var/lib/rancher/rke2/server/node-token)" node_token="$(</var/lib/rancher/rke2/server/node-token)"
else
node_token="$(<"${RKE2_TOKEN_FILE}")"
fi fi
bootstrap_secret_password="$(kubectl_rke2 get secret -n cattle-system bootstrap-secret -o go-template='{{ .data.bootstrapPassword|base64decode }}' 2>/dev/null || true)" bootstrap_secret_password="$(kubectl_rke2 get secret -n cattle-system bootstrap-secret -o go-template='{{ .data.bootstrapPassword|base64decode }}' 2>/dev/null || true)"
log "Installation complete" log "Installation complete"
echo "RKE2 version: ${RKE2_VERSION}" echo "RKE2 version: ${RKE2_VERSION}"
echo "Kubeconfig: ${KUBECONFIG_FILE}" echo "Kubeconfig: ${KUBECONFIG_FILE}"
echo "Node token: ${node_token}" echo "Node token: ${node_token}"
echo "Rancher URL: https://${RANCHER_HOSTNAME}" echo "Rancher URL: https://${RANCHER_HOSTNAME}"
echo "Bootstrap pw: ${bootstrap_secret_password:-${BOOTSTRAP_PASSWORD}}" echo "Bootstrap pw: ${bootstrap_secret_password:-${BOOTSTRAP_PASSWORD}}"
echo
echo "Useful commands:"
echo " export KUBECONFIG=${KUBECONFIG_FILE}"
echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide"
echo " /var/lib/rancher/rke2/bin/kubectl get pods -A"
echo " sudo systemctl status rke2-server --no-pager"
} }
main() { main() {