#!/bin/bash info () { echo "[$(date +"%Y-%m-%dT%H:%M:%S%z")] [INFO ] (${FUNCNAME[1]}) $@" | tr '\n' ' '; echo ""; } warn () { echo "[$(date +"%Y-%m-%dT%H:%M:%S%z")] [WARNING ] (${FUNCNAME[1]}) $@" | tr '\n' ' '; echo ""; } crit () { echo "[$(date +"%Y-%m-%dT%H:%M:%S%z")] [CRITICAL] (${FUNCNAME[1]}) $@ | Docs: $DOCS_PAGE" | tr '\n' ' '; echo ""; exit 1; } run_cmd () { local CMD="$*" info "Running: ${CMD}" set -o pipefail (eval "$CMD" 2>&1) | while IFS= read -r LINE; do info "$LINE" done EXIT_CODE=${PIPESTATUS[0]} set +o pipefail info "Command completed with exit code: ${EXIT_CODE}" [ ${EXIT_CODE} -ne 0 ] && crit "Failed to run, see above for details" } check_debug () { [ "${FP_DEBUG}" = "true" ] && set -x } check_dependencies () { [ "$EUID" != "0" ] && crit "Re-run as root or with sudo" command -v curl &> /dev/null || crit "Requirement 'curl' is not installed on this machine, please install and re-run." } check_system_requirements () { info "Checking system requirements" OS_NAME="$(uname | tr '[:upper:]' '[:lower:]')" [ "$OS_NAME" = "linux" ] || crit "Unsupported OS '$OS_NAME'. Linux is required." ARCH_NAME="$(uname -m | tr '[:upper:]' '[:lower:]')" echo "$ARCH_NAME" | egrep -q "amd64|x86_64|arm64|aarch64" || crit "Unsupported architecture '$ARCH_NAME'. Required: amd64 or arm64." command -v systemctl &> /dev/null || crit "Requirement 'systemctl' is not installed on this machine" command -v ip &> /dev/null || crit "Requirement 'ip' is not installed on this machine" command -v timeout &> /dev/null || crit "Requirement 'timeout' is not installed on this machine" [ -d /run/systemd/system ] || crit "systemd is required" MEM_KB=$(awk '/MemTotal/{print $2}' /proc/meminfo 2>/dev/null) [ "$MEM_KB" ] || crit "Unable to read total memory from /proc/meminfo" [ "$MEM_KB" -lt 4194304 ] && crit "Minimum recommended memory is >= 4GiB" ROOT_AVAIL_KB=$(df -Pk / | awk 'NR==2{print $4}') [ "$ROOT_AVAIL_KB" ] || crit "Unable to read free disk space" [ "$ROOT_AVAIL_KB" -lt 10485760 ] && crit "Minimum free disk is >= 10GiB on /" } check_variables () { #if ! [ "$FP_CLUSTER_TOKEN" ]; then # warn "Environment variable 'FP_CLUSTER_TOKEN' is not set." # warn "Setup will use 'unmanaged' mode" # warn "You can obtain a token from https://fairport.io" # warn "If you have a token you can set it with 'export FP_CLUSTER_TOKEN='" #else # info "Setup will use 'managed' mode" #fi if echo "$RKE2_TYPE" | egrep -q "server|agent"; then export INSTALL_RKE2_TYPE="$RKE2_TYPE" [ "$RKE2_SERVER" ] || crit "Environment variable 'RKE2_SERVER' must be set when RKE2_TYPE is 'agent'. It should contain the IP of an existing server node, e.g. 'https://:9345'" [ "$RKE2_TOKEN" ] || crit "Environment variable 'RKE2_TOKEN' must be set when RKE2_TYPE is 'agent'. It can be retrieved from an existing server node with 'cat /var/lib/rancher/rke2/server/node-token'" timeout 2 curl -ks "$RKE2_SERVER/ping" 2>/dev/null | egrep -q "Status|pong" || crit "Cannot reach RKE2 server at '$RKE2_SERVER', please check the address and ensure the server is running and reachable from this machine" else RKE2_TYPE="server" RKE2_BOOTSTRAP="true" fi DOCS_PAGE="https://github.com/fairport-io/get-fairport-io" MODE="$FP_MODE" OS="$(uname | tr '[:upper:]' '[:lower:]')" ARCH="$(uname -m | tr '[:upper:]' '[:lower:]')" echo "$ARCH" | egrep -q "amd64|x86_64" && ARCH="amd64" echo "$ARCH" | egrep -q "arm64|aarch64" && ARCH="arm64" RKE2_VERSION="v1.33.3+rke2r1" RKE2_VERSION_URLSAFE=$(echo "$RKE2_VERSION" | sed 's/+/%2B/g') RKE2_IMAGE_URL="https://github.com/rancher/rke2/releases/download/${RKE2_VERSION_URLSAFE}/rke2-images.${OS}-${ARCH}.tar.zst" RKE2_IMAGE_DIR="/var/lib/rancher/rke2/agent/images" RKE2_IMAGE_FILE="${RKE2_IMAGE_DIR}/rke2-images.${OS}-${ARCH}.tar.zst" RKE2_BINARY_URL="https://github.com/rancher/rke2/releases/download/${RKE2_VERSION_URLSAFE}/rke2.${OS}-${ARCH}.tar.gz" RKE2_BINARIES_DIR="/var/lib/rancher/rke2/bin" RKE2_BINARIES_BUNDLE_FILE="${RKE2_BINARIES_DIR}/rke2.${OS}-${ARCH}-${RKE2_VERSION}.tar.gz" RKE2_BINARY_FILE="/usr/local/bin/rke2" RKE2_KUBECONFIG="/etc/rancher/rke2/rke2.yaml" [ "$RKE2_TYPE" = "agent" ] && RKE2_KUBECONFIG="/var/lib/rancher/rke2/agent/rke2controller.kubeconfig" [ "$FAIRPORT_CHART_VERSION" ] || FAIRPORT_CHART_VERSION="0.3.5" [ "$FAIRPORT_CHART_SOURCE" ] || FAIRPORT_CHART_SOURCE="oci://gcr.io/fairport-io/charts/fairport" [ "$FAIRPORT_CHART_NAMESPACE" ] || FAIRPORT_CHART_NAMESPACE="fairport" [ "$RKE2_CNI" ] || RKE2_CNI=cilium if [ ! "$RKE2_CLUSTER_CIDR" ]; then [ "$(ip -4 route show default)" ] && RKE2_CLUSTER_CIDR="100.64.0.0/12" [ "$(ip -6 route show default)" ] && RKE2_CLUSTER_CIDR="$RKE2_CLUSTER_CIDR,fd00:10:42::/48" fi if [ ! "$RKE2_SERVICE_CIDR" ]; then [ "$(ip -4 route show default)" ] && RKE2_SERVICE_CIDR="100.80.0.0/12" [ "$(ip -6 route show default)" ] && RKE2_SERVICE_CIDR="$RKE2_SERVICE_CIDR,fd00:10:43::/108" fi DEFAULT_IPV4=$(ip route get 1 | sed -n '/src/{s/.*src *\([^ ]*\).*/\1/p;q}') echo "$RKE2_CLUSTER_CIDR" | grep -q ":" && DEFAULT_IPV6=$(ip route get 2001:4860:4860::8888 | sed -n '/src/{s/.*src *\([^ ]*\).*/\1/p;q}') } k8s_same_version () { local EXISTING_RKE2_VERSION EXISTING_RKE2_VERSION=$(rke2 --version 2>&1 | grep -o "v[0-9]\S*") [ "$EXISTING_RKE2_VERSION" = "$RKE2_VERSION" ] } k8s_is_healthy () { local NODE_NAME=$(grep node-name /etc/rancher/rke2/config.yaml 2>/dev/null | cut -d ' ' -f2 | tr -d "\"'") if [ "$RKE2_TYPE" = "agent" ]; then local CMD="fpk get node $NODE_NAME 2>&1 | egrep -v 'NAME| Ready ' | wc -l" else local CMD='fpk -n kube-system get pods 2>&1 | egrep -v "NAME|Running|Completed|Terminating" | wc -l' fi local STATUS=$(timeout 10 bash -c "$CMD" 2>&1) if [ "$STATUS" -eq 0 ]; then info "Cluster health: OK" return 0 else info "Cluster health: NOT OK" return 1 fi } check_application_install_status () { [ "$RKE2_TYPE" != "server" ] && return 0 get_installer_state () { fpk -n fairport logs fairport-installer-0 2>&1 | tac | sed '/Detected changes/q' | tac | sed 1d | grep -E "NAME:|STATUS|helmfile apply succeeded" | paste - - } info "Wait for fairport installer to start..." MAX_RETRIES=300 for I in $(seq 1 $MAX_RETRIES); do fpk -n kube-system get pod 2>&1 | grep -q "fairport.*Completed" && CHART_OK=true fpk get ns 2>&1 | grep -q "fairport" && NS_OK=true fpk -n fairport get pods 2>&1 | grep -q "fairport-installer-0.* Running .*" && POD_OK=true [ "$CHART_OK" ] && [ "$NS_OK" ] && [ "$POD_OK" ] && info "Fairport installer has started" && sleep 5 && break if [ $I -ge $MAX_RETRIES ]; then [ "$CHART_OK" ] || crit "Fairport chart, check 'fpk -n kube-system get pods' for details" [ "$NS_OK" ] || crit "Fairport installer namespace 'fairport' not found, check 'fpk get ns' for details" [ "$POD_OK" ] || crit "Fairport installer pod 'fairport-installer-0' not running, check 'fpk -n fairport get pods' for details" fi sleep 1 done info "Installing applications, this may take a few minutes..." MAX_RETRIES=600 for I in $(seq 1 $MAX_RETRIES); do CURRENT_INSTALLER_STATE=$(get_installer_state) echo "$CURRENT_INSTALLER_STATE" | grep -q "helmfile .* succeeded" && info "Installation complete, you can now use 'fpk' to interact with your cluster and/or add nodes with /usr/local/bin/fp-add-[server|agent]" && return 0 echo "$CURRENT_INSTALLER_STATE" | grep -q "NAME: fairport, STATUS: failed" && crit "Fairport installation failed, check 'fpk -n fairport logs fairport-installer-0' for details" [ $I -ge $MAX_RETRIES ] && crit "Fairport installation is taking too long, check 'fpk -n fairport logs fairport-installer-0' for details" if [ "$CURRENT_INSTALLER_STATE" != "$PREVIOUS_INSTALLER_STATE" ]; then info $(echo "$CURRENT_INSTALLER_STATE" | grep -v "$PREVIOUS_INSTALLER_STATE") fi PREVIOUS_INSTALLER_STATE="$CURRENT_INSTALLER_STATE" sleep 1 done } wait_for_k8s_ready () { local MAX_RETRIES=$1 while ! k8s_is_healthy; do [ $MAX_RETRIES -le 0 ] && crit "Containers in kube-system didn't start, refer to documentation: TODO" (( $MAX_RETRIES % 5 == 0 )) && info "Wait for all containers in kube-system to start ($MAX_RETRIES retries left)" MAX_RETRIES=$(($MAX_RETRIES - 1)) sleep 1 done } update_sysctl () { info "Updating sysctl settings for k8s" cat << EOF > /etc/sysctl.d/90-k8s.conf net.ipv4.ip_forward = 1 net.bridge.bridge-nf-call-ip6tables = 1 net.bridge.bridge-nf-call-iptables = 1 fs.inotify.max_user_watches=2099999999 fs.inotify.max_user_instances=2099999999 fs.inotify.max_queued_events=2099999999 vm.max_map_count=262144 EOF run_cmd sysctl --system &> /dev/null } generate_rke2_config () { info "Generating rke2 config file" local NODE_NAME="$(echo $DEFAULT_IPV4 | tr '.' '-')-$(date +%s)" mkdir -p /etc/rancher/rke2 cat << EOF > /etc/rancher/rke2/config.yaml write-kubeconfig-mode: "0600" node-name: "$NODE_NAME" node-ip: "${DEFAULT_IPV4}${DEFAULT_IPV6:+,$DEFAULT_IPV6}" cluster-cidr: "$RKE2_CLUSTER_CIDR" service-cidr: "$RKE2_SERVICE_CIDR" etcd-expose-metrics: true etcd-arg: "quota-backend-bytes=4294967296" cni: "$RKE2_CNI" disable-kube-proxy: true disable: - rke2-ingress-nginx ingress-controller: traefik EOF chmod 600 /etc/rancher/rke2/config.yaml [ "$RKE2_SERVER" ] && echo "server: $RKE2_SERVER" >> /etc/rancher/rke2/config.yaml [ "$RKE2_TOKEN" ] && echo "token: $RKE2_TOKEN" >> /etc/rancher/rke2/config.yaml if systemctl is-active --quiet systemd-resolved && [ -f "/run/systemd/resolve/resolv.conf" ]; then echo "kubelet-arg:" >> /etc/rancher/rke2/config.yaml echo "- resolv-conf=/run/systemd/resolve/resolv.conf" >> /etc/rancher/rke2/config.yaml fi } install_kubernetes () { EXISTING_RKE2_VERSION=$(rke2 --version 2>&1 | grep -o "v[0-9]\S*") if [ "$EXISTING_RKE2_VERSION" != "$RKE2_VERSION" ]; then [ ! -f "$RKE2_BINARY_FILE" ] && info "Installing rke2 ($RKE2_VERSION) from $RKE2_BINARY_URL" [ -f "$RKE2_BINARY_FILE" ] && info "Upgrading rke2 ($RKE2_VERSION) from $RKE2_BINARY_URL" run_cmd mkdir -p $RKE2_BINARIES_DIR/$RKE2_VERSION [ ! -f "$RKE2_BINARIES_BUNDLE_FILE" ] && run_cmd curl -sL $RKE2_BINARY_URL -o $RKE2_BINARIES_BUNDLE_FILE run_cmd tar -xf $RKE2_BINARIES_BUNDLE_FILE -C $RKE2_BINARIES_DIR/$RKE2_VERSION run_cmd cp $RKE2_BINARIES_DIR/$RKE2_VERSION/bin/* /usr/local/bin/ run_cmd cp $RKE2_BINARIES_DIR/$RKE2_VERSION/lib/systemd/system/rke2* /etc/systemd/system/ generate_rke2_config fi run_cmd systemctl daemon-reload run_cmd systemctl enable rke2-$RKE2_TYPE.service run_cmd systemctl start rke2-$RKE2_TYPE.service info "Install fpk" run_cmd mkdir -p /usr/local/bin echo -e "#!/bin/bash\n\n/var/lib/rancher/rke2/bin/kubectl --kubeconfig=$RKE2_KUBECONFIG \$*" > /usr/local/bin/fpk [ $? -ne 0 ] && crit "Failed to create /usr/local/bin/fpk" run_cmd chmod +x /usr/local/bin/fpk info "Add script to simplify adding more nodes to the cluster" echo -e '#!/bin/bash\n\n[ $(whoami) != root ] && echo "Failure, must be root" && exit 1\necho "[ \$(whoami) != root ] && echo \"Failure, must be root\" && exit 1 ; export RKE2_TYPE=server ; export RKE2_SERVER=https://$(ip route get 1 | sed -n '\''/src/{s/.*src *\([^ ]*\).*/\\1/p;q}'\''):9345 ; export RKE2_TOKEN=$(cat /var/lib/rancher/rke2/server/node-token) ; curl -s https://get.fairport.io | bash - || exit 1 ; history -d \$HISTCMD || true" ; exit 0' > /usr/local/bin/fp-add-server [ $? -ne 0 ] && crit "Failed to create /usr/local/bin/fp-add-server" echo -e '#!/bin/bash\n\n[ $(whoami) != root ] && echo "Failure, must be root" && exit 1\necho "[ \$(whoami) != root ] && echo \"Failure, must be root\" && exit 1 ; export RKE2_TYPE=agent ; export RKE2_SERVER=https://$(ip route get 1 | sed -n '\''/src/{s/.*src *\([^ ]*\).*/\\1/p;q}'\''):9345 ; export RKE2_TOKEN=$(cat /var/lib/rancher/rke2/server/node-token) ; curl -s https://get.fairport.io | bash - || exit 1 ; history -d \$HISTCMD || true" ; exit 0' > /usr/local/bin/fp-add-agent [ $? -ne 0 ] && crit "Failed to create /usr/local/bin/fp-add-agent" run_cmd chmod +x /usr/local/bin/fp-add-server /usr/local/bin/fp-add-agent info "Ensure /usr/local/bin is in PATH" if ! echo $PATH | grep -q "/usr/local/bin"; then echo 'export PATH=$PATH:/usr/local/bin' >> /$HOME/.bashrc fi export PATH=$PATH:/usr/local/bin info "Update cilium config" cat << EOF > /var/lib/rancher/rke2/server/manifests/rke2-cilium-config.yaml --- apiVersion: helm.cattle.io/v1 kind: HelmChartConfig metadata: name: rke2-cilium namespace: kube-system spec: valuesContent: |- kubeProxyReplacement: true k8sServiceHost: "${DEFAULT_IPV4}" k8sServicePort: "6443" operator: replicas: 1 encryption: enabled: true type: wireguard nodeEncryption: true EOF info "Update coredns config" cat << EOF > /var/lib/rancher/rke2/server/manifests/rke2-coredns-config.yaml --- apiVersion: helm.cattle.io/v1 kind: HelmChartConfig metadata: name: rke2-coredns namespace: kube-system spec: valuesContent: |- autoscaler: coresPerReplica: 0 nodesPerReplica: 1 min: 1 max: 3 nodeSelector: node-role.kubernetes.io/control-plane: "true" EOF info "Waiting for k8s to be ready" wait_for_k8s_ready 180 } install_fp_helm_chart () { info "Installing Fairport components" if [ "$FAIRPORT_CONFIG_FILE" ]; then FAIRPORT_CONFIG_CONTENT=$(cat $FAIRPORT_CONFIG_FILE | sed 's/^/ /') info "Using Fairport config from $FAIRPORT_CONFIG_FILE" fi cat << EOF | fpk apply -f - apiVersion: helm.cattle.io/v1 kind: HelmChart metadata: name: fairport namespace: kube-system spec: chart: $FAIRPORT_CHART_SOURCE version: $FAIRPORT_CHART_VERSION targetNamespace: $FAIRPORT_CHART_NAMESPACE createNamespace: true failurePolicy: abort timeout: 10m valuesContent: |- $FAIRPORT_CONFIG_CONTENT EOF } install_fp_components () { [ "$RKE2_BOOTSTRAP" = "true" ] && install_fp_helm_chart return 0 } install_images () { if [ -f "$RKE2_IMAGE_FILE" ]; then info "Image file $RKE2_IMAGE_FILE already exists, skipping download" return 0 fi info "Download images for installation to $RKE2_IMAGE_FILE" run_cmd mkdir -p $RKE2_IMAGE_DIR run_cmd curl -sL $RKE2_IMAGE_URL -o $RKE2_IMAGE_FILE } install () { if k8s_is_healthy && k8s_same_version; then info "K8s is already installed and the correct version is running" return 0 fi update_sysctl install_images install_kubernetes install_fp_components check_application_install_status } stop () { info "Stopping k8s" run_cmd /usr/local/bin/rke2-killall.sh exit 0 } uninstall () { info "Uninstall k8s" run_cmd '/usr/local/bin/rke2-uninstall.sh || true' run_cmd 'rm -rf /var/lib/rancher || true' run_cmd 'rm -rf /etc/rancher || true' run_cmd 'rm -rf /usr/local/bin/rke*' run_cmd 'rm -rf /sys/fs/bpf/cilium || true' run_cmd 'rm -rf /opt/cni/bin/ || true' run_cmd 'rm -rf /etc/cni/net.d/ || true' exit 0 } main () { check_debug check_dependencies check_system_requirements echo "$@" | grep -q -- "stop" && stop echo "$@" | grep -q -- "uninstall" && uninstall check_variables echo "$@" | grep -q -- "update-components" && install_fp_helm_chart install } main "$@"