Update master_node_install.sh
This commit is contained in:
parent
13eaabe24d
commit
fd5987e06e
|
|
@ -4,14 +4,32 @@ set -Eeuo pipefail
|
||||||
########################################
|
########################################
|
||||||
# Arch Linux Rancher Management Cluster
|
# Arch Linux Rancher Management Cluster
|
||||||
# Single-node RKE2 server + Rancher
|
# Single-node RKE2 server + Rancher
|
||||||
|
#
|
||||||
|
# What this script does:
|
||||||
|
# - Disables swap
|
||||||
|
# - Installs required Arch packages
|
||||||
|
# - Configures kernel modules and sysctl for Kubernetes
|
||||||
|
# - Configures NetworkManager to ignore CNI interfaces
|
||||||
|
# - Installs RKE2 server
|
||||||
|
# - Waits for Kubernetes and bundled RKE2 addons to become healthy
|
||||||
|
# - Installs cert-manager
|
||||||
|
# - Installs Rancher
|
||||||
|
#
|
||||||
|
# Optional environment variables:
|
||||||
|
# RKE2_VERSION=v1.34.5+rke2r1
|
||||||
|
# RANCHER_HOSTNAME=rancher.example.com
|
||||||
|
# BOOTSTRAP_PASSWORD=changeme
|
||||||
|
# RKE2_TOKEN=my-shared-secret
|
||||||
|
# INSTALL_RANCHER=true
|
||||||
########################################
|
########################################
|
||||||
|
|
||||||
RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
|
RKE2_VERSION="${RKE2_VERSION:-v1.34.5+rke2r1}"
|
||||||
INSTALL_RANCHER="${INSTALL_RANCHER:-true}"
|
INSTALL_RANCHER="${INSTALL_RANCHER:-true}"
|
||||||
BOOTSTRAP_PASSWORD="${BOOTSTRAP_PASSWORD:-admin}"
|
BOOTSTRAP_PASSWORD="${BOOTSTRAP_PASSWORD:-adminadminadmin}"
|
||||||
RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}"
|
RANCHER_HOSTNAME="${RANCHER_HOSTNAME:-}"
|
||||||
RKE2_CONFIG_DIR="/etc/rancher/rke2"
|
RKE2_CONFIG_DIR="/etc/rancher/rke2"
|
||||||
RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
|
RKE2_CONFIG_FILE="${RKE2_CONFIG_DIR}/config.yaml"
|
||||||
|
RKE2_TOKEN_FILE="${RKE2_CONFIG_DIR}/server-token"
|
||||||
KUBECONFIG_FILE="/etc/rancher/rke2/rke2.yaml"
|
KUBECONFIG_FILE="/etc/rancher/rke2/rke2.yaml"
|
||||||
RANCHER_REPO_NAME="rancher-stable"
|
RANCHER_REPO_NAME="rancher-stable"
|
||||||
RANCHER_REPO_URL="https://releases.rancher.com/server-charts/stable"
|
RANCHER_REPO_URL="https://releases.rancher.com/server-charts/stable"
|
||||||
|
|
@ -37,6 +55,7 @@ die() {
|
||||||
on_error() {
|
on_error() {
|
||||||
local exit_code=$?
|
local exit_code=$?
|
||||||
local line_no=$1
|
local line_no=$1
|
||||||
|
|
||||||
warn "Script failed on line ${line_no} with exit code ${exit_code}"
|
warn "Script failed on line ${line_no} with exit code ${exit_code}"
|
||||||
warn "Useful diagnostics:"
|
warn "Useful diagnostics:"
|
||||||
echo " sudo systemctl status rke2-server -l --no-pager"
|
echo " sudo systemctl status rke2-server -l --no-pager"
|
||||||
|
|
@ -59,9 +78,25 @@ helm_rke2() {
|
||||||
helm --kubeconfig "${KUBECONFIG_FILE}" "$@"
|
helm --kubeconfig "${KUBECONFIG_FILE}" "$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait_for_file() {
|
||||||
|
local file="$1"
|
||||||
|
local timeout="${2:-300}"
|
||||||
|
local waited=0
|
||||||
|
|
||||||
|
until [[ -f "${file}" ]]; do
|
||||||
|
sleep 2
|
||||||
|
waited=$((waited + 2))
|
||||||
|
if (( waited >= timeout )); then
|
||||||
|
die "Timed out waiting for file: ${file}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
disable_swap() {
|
disable_swap() {
|
||||||
log "Disabling swap"
|
log "Disabling swap"
|
||||||
|
|
||||||
swapoff -a || true
|
swapoff -a || true
|
||||||
|
|
||||||
if [[ -f /etc/fstab ]]; then
|
if [[ -f /etc/fstab ]]; then
|
||||||
cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
|
cp /etc/fstab "/etc/fstab.bak.$(date +%Y%m%d%H%M%S)"
|
||||||
sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rancher-script /' /etc/fstab
|
sed -Ei '/^[^#].+\s+swap\s+/ s/^/# disabled-by-rancher-script /' /etc/fstab
|
||||||
|
|
@ -70,6 +105,7 @@ disable_swap() {
|
||||||
|
|
||||||
install_packages() {
|
install_packages() {
|
||||||
log "Installing required Arch packages"
|
log "Installing required Arch packages"
|
||||||
|
|
||||||
pacman -Sy --noconfirm archlinux-keyring
|
pacman -Sy --noconfirm archlinux-keyring
|
||||||
|
|
||||||
if pacman -Q iptables >/dev/null 2>&1; then
|
if pacman -Q iptables >/dev/null 2>&1; then
|
||||||
|
|
@ -89,7 +125,6 @@ install_packages() {
|
||||||
iproute2 \
|
iproute2 \
|
||||||
iptables-nft \
|
iptables-nft \
|
||||||
jq \
|
jq \
|
||||||
nftables \
|
|
||||||
nfs-utils \
|
nfs-utils \
|
||||||
open-iscsi \
|
open-iscsi \
|
||||||
openssl \
|
openssl \
|
||||||
|
|
@ -142,8 +177,14 @@ EOF
|
||||||
|
|
||||||
enable_support_services() {
|
enable_support_services() {
|
||||||
log "Enabling support services"
|
log "Enabling support services"
|
||||||
|
|
||||||
systemctl enable --now iscsid.service || true
|
systemctl enable --now iscsid.service || true
|
||||||
systemctl enable --now nftables.service || true
|
|
||||||
|
# Do NOT enable nftables.service here.
|
||||||
|
# On this Arch + RKE2 setup it broke service routing for the cluster IP range.
|
||||||
|
systemctl stop nftables.service >/dev/null 2>&1 || true
|
||||||
|
systemctl disable nftables.service >/dev/null 2>&1 || true
|
||||||
|
nft flush ruleset >/dev/null 2>&1 || true
|
||||||
}
|
}
|
||||||
|
|
||||||
install_rke2() {
|
install_rke2() {
|
||||||
|
|
@ -151,7 +192,19 @@ install_rke2() {
|
||||||
|
|
||||||
mkdir -p "${RKE2_CONFIG_DIR}"
|
mkdir -p "${RKE2_CONFIG_DIR}"
|
||||||
|
|
||||||
|
if [[ -n "${RKE2_TOKEN:-}" ]]; then
|
||||||
|
printf '%s\n' "${RKE2_TOKEN}" > "${RKE2_TOKEN_FILE}"
|
||||||
|
chmod 600 "${RKE2_TOKEN_FILE}"
|
||||||
|
elif [[ ! -f "${RKE2_TOKEN_FILE}" ]]; then
|
||||||
|
openssl rand -hex 24 > "${RKE2_TOKEN_FILE}"
|
||||||
|
chmod 600 "${RKE2_TOKEN_FILE}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
local token
|
||||||
|
token="$(<"${RKE2_TOKEN_FILE}")"
|
||||||
|
|
||||||
cat >"${RKE2_CONFIG_FILE}" <<EOF
|
cat >"${RKE2_CONFIG_FILE}" <<EOF
|
||||||
|
token: ${token}
|
||||||
write-kubeconfig-mode: "0644"
|
write-kubeconfig-mode: "0644"
|
||||||
tls-san:
|
tls-san:
|
||||||
- ${RANCHER_HOSTNAME:-127.0.0.1}
|
- ${RANCHER_HOSTNAME:-127.0.0.1}
|
||||||
|
|
@ -170,47 +223,45 @@ EOF
|
||||||
systemctl restart rke2-server.service
|
systemctl restart rke2-server.service
|
||||||
}
|
}
|
||||||
|
|
||||||
wait_for_rke2() {
|
wait_for_api() {
|
||||||
log "Waiting for RKE2 and Kubernetes API"
|
log "Waiting for RKE2 and Kubernetes API"
|
||||||
|
|
||||||
local waited=0
|
wait_for_file "${KUBECONFIG_FILE}" 600
|
||||||
until [[ -f "${KUBECONFIG_FILE}" ]]; do
|
wait_for_file "/var/lib/rancher/rke2/bin/kubectl" 600
|
||||||
sleep 2
|
|
||||||
waited=$((waited + 2))
|
|
||||||
if (( waited >= 600 )); then
|
|
||||||
journalctl -u rke2-server -n 200 --no-pager || true
|
|
||||||
die "Timed out waiting for ${KUBECONFIG_FILE}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
waited=0
|
local waited=0
|
||||||
until /var/lib/rancher/rke2/bin/kubectl --kubeconfig "${KUBECONFIG_FILE}" get --raw=/readyz >/dev/null 2>&1; do
|
until kubectl_rke2 get --raw=/readyz >/dev/null 2>&1; do
|
||||||
sleep 5
|
sleep 5
|
||||||
waited=$((waited + 5))
|
waited=$((waited + 5))
|
||||||
|
|
||||||
if (( waited % 30 == 0 )); then
|
if (( waited % 30 == 0 )); then
|
||||||
warn "Kubernetes API not ready yet; latest rke2-server logs:"
|
warn "Kubernetes API not ready yet; recent rke2-server logs:"
|
||||||
journalctl -u rke2-server -n 40 --no-pager || true
|
journalctl -u rke2-server -n 40 --no-pager || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if (( waited >= 900 )); then
|
if (( waited >= 900 )); then
|
||||||
journalctl -u rke2-server -n 200 --no-pager || true
|
journalctl -u rke2-server -n 200 --no-pager || true
|
||||||
die "Timed out waiting for Kubernetes API readiness"
|
die "Timed out waiting for Kubernetes API readiness"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_ready_node() {
|
||||||
log "Waiting for any node to become Ready"
|
log "Waiting for any node to become Ready"
|
||||||
|
|
||||||
waited=0
|
local waited=0
|
||||||
until kubectl_rke2 get nodes -o json 2>/dev/null | jq -e '
|
until kubectl_rke2 get nodes -o json 2>/dev/null | jq -e '
|
||||||
.items | length > 0 and any(.[]; any(.status.conditions[]?; .type=="Ready" and .status=="True"))
|
.items | length > 0 and any(.[]; any(.status.conditions[]?; .type=="Ready" and .status=="True"))
|
||||||
' >/dev/null; do
|
' >/dev/null; do
|
||||||
sleep 5
|
sleep 5
|
||||||
waited=$((waited + 5))
|
waited=$((waited + 5))
|
||||||
|
|
||||||
if (( waited % 30 == 0 )); then
|
if (( waited % 30 == 0 )); then
|
||||||
warn "No Ready node yet; current status:"
|
warn "No Ready node yet; current status:"
|
||||||
kubectl_rke2 get nodes -o wide || true
|
kubectl_rke2 get nodes -o wide || true
|
||||||
kubectl_rke2 get pods -A || true
|
kubectl_rke2 get pods -A || true
|
||||||
journalctl -u rke2-server -n 60 --no-pager || true
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if (( waited >= 1200 )); then
|
if (( waited >= 1200 )); then
|
||||||
kubectl_rke2 get nodes -o wide || true
|
kubectl_rke2 get nodes -o wide || true
|
||||||
kubectl_rke2 get pods -A || true
|
kubectl_rke2 get pods -A || true
|
||||||
|
|
@ -218,11 +269,64 @@ wait_for_rke2() {
|
||||||
die "Timed out waiting for a Ready node"
|
die "Timed out waiting for a Ready node"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
wait_for_system_pods() {
|
||||||
|
log "Waiting for core system pods"
|
||||||
|
|
||||||
|
local waited=0
|
||||||
|
until kubectl_rke2 -n kube-system get pod -l k8s-app=kube-dns >/dev/null 2>&1; do
|
||||||
|
sleep 5
|
||||||
|
waited=$((waited + 5))
|
||||||
|
if (( waited >= 600 )); then
|
||||||
|
kubectl_rke2 get pods -A || true
|
||||||
|
die "Timed out waiting for kube-system pods to appear"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
log "Waiting for system pods to settle"
|
|
||||||
kubectl_rke2 get nodes -o wide || true
|
kubectl_rke2 get nodes -o wide || true
|
||||||
kubectl_rke2 get pods -A || true
|
kubectl_rke2 get pods -A || true
|
||||||
kubectl_rke2 -n kube-system wait --for=condition=Ready pods --all --timeout=20m || true
|
|
||||||
|
# Wait for Canal first because service routing depends on it.
|
||||||
|
waited=0
|
||||||
|
until kubectl_rke2 -n kube-system get pods -l app=rke2-canal -o json 2>/dev/null | jq -e '
|
||||||
|
.items | length > 0 and all(.[]; .status.phase=="Running")
|
||||||
|
' >/dev/null; do
|
||||||
|
sleep 5
|
||||||
|
waited=$((waited + 5))
|
||||||
|
if (( waited % 30 == 0 )); then
|
||||||
|
warn "Canal not fully ready yet"
|
||||||
|
kubectl_rke2 -n kube-system get pods -o wide || true
|
||||||
|
fi
|
||||||
|
if (( waited >= 900 )); then
|
||||||
|
kubectl_rke2 -n kube-system get pods -o wide || true
|
||||||
|
die "Timed out waiting for Canal"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Give kube-proxy and service routing a moment to settle.
|
||||||
|
sleep 20
|
||||||
|
|
||||||
|
# Wait for bundled addons that Rancher depends on.
|
||||||
|
waited=0
|
||||||
|
until kubectl_rke2 -n kube-system get deploy rke2-ingress-nginx-controller rke2-metrics-server rke2-snapshot-controller >/dev/null 2>&1; do
|
||||||
|
sleep 5
|
||||||
|
waited=$((waited + 5))
|
||||||
|
if (( waited >= 900 )); then
|
||||||
|
kubectl_rke2 -n kube-system get pods -o wide || true
|
||||||
|
die "Timed out waiting for bundled RKE2 addon deployments"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
kubectl_rke2 -n kube-system rollout status deploy/rke2-ingress-nginx-controller --timeout=20m
|
||||||
|
kubectl_rke2 -n kube-system rollout status deploy/rke2-metrics-server --timeout=20m
|
||||||
|
kubectl_rke2 -n kube-system rollout status deploy/rke2-snapshot-controller --timeout=20m
|
||||||
|
|
||||||
|
# CoreDNS can be slightly slower; wait for it too.
|
||||||
|
kubectl_rke2 -n kube-system rollout status deploy/rke2-coredns-rke2-coredns --timeout=20m || true
|
||||||
|
|
||||||
|
log "System pods are settled"
|
||||||
|
kubectl_rke2 get pods -A || true
|
||||||
}
|
}
|
||||||
|
|
||||||
resolve_hostname() {
|
resolve_hostname() {
|
||||||
|
|
@ -232,6 +336,7 @@ resolve_hostname() {
|
||||||
|
|
||||||
local detected_ip=""
|
local detected_ip=""
|
||||||
detected_ip="$(ip route get 1.1.1.1 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i=="src") {print $(i+1); exit}}')"
|
detected_ip="$(ip route get 1.1.1.1 2>/dev/null | awk '{for (i=1; i<=NF; i++) if ($i=="src") {print $(i+1); exit}}')"
|
||||||
|
|
||||||
[[ -n "${detected_ip}" ]] || die "Could not auto-detect server IP. Set RANCHER_HOSTNAME manually."
|
[[ -n "${detected_ip}" ]] || die "Could not auto-detect server IP. Set RANCHER_HOSTNAME manually."
|
||||||
|
|
||||||
RANCHER_HOSTNAME="${detected_ip}.sslip.io"
|
RANCHER_HOSTNAME="${detected_ip}.sslip.io"
|
||||||
|
|
@ -263,7 +368,10 @@ install_cert_manager() {
|
||||||
}
|
}
|
||||||
|
|
||||||
install_rancher() {
|
install_rancher() {
|
||||||
[[ "${INSTALL_RANCHER}" == "true" ]] || return
|
if [[ "${INSTALL_RANCHER}" != "true" ]]; then
|
||||||
|
warn "INSTALL_RANCHER=false, skipping Rancher install"
|
||||||
|
return
|
||||||
|
fi
|
||||||
|
|
||||||
resolve_hostname
|
resolve_hostname
|
||||||
|
|
||||||
|
|
@ -292,16 +400,25 @@ print_summary() {
|
||||||
|
|
||||||
if [[ -f /var/lib/rancher/rke2/server/node-token ]]; then
|
if [[ -f /var/lib/rancher/rke2/server/node-token ]]; then
|
||||||
node_token="$(</var/lib/rancher/rke2/server/node-token)"
|
node_token="$(</var/lib/rancher/rke2/server/node-token)"
|
||||||
|
else
|
||||||
|
node_token="$(<"${RKE2_TOKEN_FILE}")"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
bootstrap_secret_password="$(kubectl_rke2 get secret -n cattle-system bootstrap-secret -o go-template='{{ .data.bootstrapPassword|base64decode }}' 2>/dev/null || true)"
|
bootstrap_secret_password="$(kubectl_rke2 get secret -n cattle-system bootstrap-secret -o go-template='{{ .data.bootstrapPassword|base64decode }}' 2>/dev/null || true)"
|
||||||
|
|
||||||
log "Installation complete"
|
log "Installation complete"
|
||||||
|
|
||||||
echo "RKE2 version: ${RKE2_VERSION}"
|
echo "RKE2 version: ${RKE2_VERSION}"
|
||||||
echo "Kubeconfig: ${KUBECONFIG_FILE}"
|
echo "Kubeconfig: ${KUBECONFIG_FILE}"
|
||||||
echo "Node token: ${node_token}"
|
echo "Node token: ${node_token}"
|
||||||
echo "Rancher URL: https://${RANCHER_HOSTNAME}"
|
echo "Rancher URL: https://${RANCHER_HOSTNAME}"
|
||||||
echo "Bootstrap pw: ${bootstrap_secret_password:-${BOOTSTRAP_PASSWORD}}"
|
echo "Bootstrap pw: ${bootstrap_secret_password:-${BOOTSTRAP_PASSWORD}}"
|
||||||
|
echo
|
||||||
|
echo "Useful commands:"
|
||||||
|
echo " export KUBECONFIG=${KUBECONFIG_FILE}"
|
||||||
|
echo " /var/lib/rancher/rke2/bin/kubectl get nodes -o wide"
|
||||||
|
echo " /var/lib/rancher/rke2/bin/kubectl get pods -A"
|
||||||
|
echo " sudo systemctl status rke2-server --no-pager"
|
||||||
}
|
}
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
|
|
@ -312,7 +429,9 @@ main() {
|
||||||
configure_networkmanager
|
configure_networkmanager
|
||||||
enable_support_services
|
enable_support_services
|
||||||
install_rke2
|
install_rke2
|
||||||
wait_for_rke2
|
wait_for_api
|
||||||
|
wait_for_ready_node
|
||||||
|
wait_for_system_pods
|
||||||
install_cert_manager
|
install_cert_manager
|
||||||
install_rancher
|
install_rancher
|
||||||
print_summary
|
print_summary
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue