From e1edbf33c5293ce1574ca7d1b6937063543cf7f4 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 12:34:58 +0200 Subject: [PATCH 01/20] Add baremetal node adoption automation for TNF Introduce adopt-baremetal.sh to onboard existing baremetal nodes into the dev-scripts deployment workflow. Parses inventory_baremetal.ini (template or wizard-generated), validates BMC credentials via Redfish, and generates ironic_nodes.json + config_baremetal_fencing.sh artifacts for NODES_PLATFORM=baremetal deployments. OCPEDGE-2774 Co-Authored-By: Claude Opus 4.6 --- deploy/Makefile | 10 + deploy/openshift-clusters/.gitignore | 4 + .../inventory_baremetal.ini.sample | 32 ++ .../scripts/adopt-baremetal.sh | 419 ++++++++++++++++++ 4 files changed, 465 insertions(+) create mode 100644 deploy/openshift-clusters/inventory_baremetal.ini.sample create mode 100755 deploy/openshift-clusters/scripts/adopt-baremetal.sh diff --git a/deploy/Makefile b/deploy/Makefile index e825e0ca..f7d20512 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -92,6 +92,12 @@ fencing-assisted: keep-instance: @../helpers/keep-instance.sh '$(DAYS)' +adopt-baremetal: + @./openshift-clusters/scripts/adopt-baremetal.sh + +verify-baremetal: + @./openshift-clusters/scripts/adopt-baremetal.sh --verify-only + patch-nodes: @./openshift-clusters/scripts/patch-nodes.sh get-tnf-logs: @@ -138,6 +144,10 @@ help: @echo " clean-spoke - Clean spoke cluster resources (VMs, network, auth) from assisted installer" @echo " patch-nodes - Build resource-agents RPM and patch cluster nodes (default version: 4.11)" @echo "" + @echo "Baremetal Adoption:" + @echo " adopt-baremetal - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts" + @echo " verify-baremetal - Verify BMC credentials for adopted baremetal nodes (no artifacts)" + @echo "" @echo "Cluster Utilities:" @echo " get-tnf-logs - Collect pacemaker and etcd logs from cluster nodes" diff --git a/deploy/openshift-clusters/.gitignore b/deploy/openshift-clusters/.gitignore index 7c145393..a77bcddb 100644 --- a/deploy/openshift-clusters/.gitignore +++ b/deploy/openshift-clusters/.gitignore @@ -1,4 +1,8 @@ inventory.ini +inventory_baremetal.ini + +# Generated adoption artifacts (contain BMC credentials) +clusters/ proxy.env kubeconfig kubeadmin-password diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample new file mode 100644 index 00000000..88019edc --- /dev/null +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -0,0 +1,32 @@ +# Baremetal node inventory for TNF adoption +# +# NOTE: This is separate from inventory.ini, which targets the hypervisor host. +# This file describes the physical baremetal nodes to be adopted as OpenShift nodes. +# inventory.ini → hypervisor (where dev-scripts runs) +# inventory_baremetal.ini → baremetal nodes (BMC endpoints for adoption) +# +# Copy this file to inventory_baremetal.ini and fill in your node details. +# Then run: make adopt-baremetal +# +# Each node requires: +# bmc_ip - BMC/iDRAC/iLO management IP address +# bmc_user - BMC login username +# bmc_pass - BMC login password +# boot_mac - MAC address of the NIC used for PXE boot +# +# The hostname (first field) becomes the node name in ironic_nodes.json. +# For TNF, you need exactly 2 nodes (master-0 and master-1). + +[baremetal_nodes] +master-0 bmc_ip=192.168.1.100 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:01 +master-1 bmc_ip=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:02 + +[baremetal_nodes:vars] +# BMC driver — only redfish is supported for TNF fencing +bmc_driver=redfish + +# Skip TLS verification for BMC endpoints (common with self-signed certs) +bmc_verify_ca=False + +# Node CPU architecture +cpu_arch=x86_64 diff --git a/deploy/openshift-clusters/scripts/adopt-baremetal.sh b/deploy/openshift-clusters/scripts/adopt-baremetal.sh new file mode 100755 index 00000000..b3473c12 --- /dev/null +++ b/deploy/openshift-clusters/scripts/adopt-baremetal.sh @@ -0,0 +1,419 @@ +#!/usr/bin/bash +# +# Adopt existing baremetal nodes for TNF deployment. +# +# Parses inventory_baremetal.ini, validates BMC credentials via Redfish, +# and generates ironic_nodes.json + config_baremetal_fencing.sh for dev-scripts. +# +# Usage: +# adopt-baremetal.sh [options] +# +# Options: +# --cluster-name NAME Cluster name for output directory (default: ostest) +# --skip-verify Skip BMC credential verification +# --verify-only Only verify BMC credentials, don't generate artifacts +# --config-base FILE Base config to derive baremetal config from +# -h, --help Show this help message + +set -o nounset +set -o errexit +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +OC_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + +CLUSTER_NAME="${CLUSTER_NAME:-ostest}" +SKIP_VERIFY=false +VERIFY_ONLY=false +CONFIG_BASE="" +INVENTORY="${OC_DIR}/inventory_baremetal.ini" + +# Node data arrays — populated by parse_inventory +declare -a NODE_NAMES=() +declare -a NODE_BMC_IPS=() +declare -a NODE_BMC_USERS=() +declare -a NODE_BMC_PASSES=() +declare -a NODE_BOOT_MACS=() + +# Group defaults +BMC_VERIFY_CA="False" +CPU_ARCH="x86_64" + +############################################################################## +# Helpers +############################################################################## + +die() { echo "Error: $*" >&2; exit 1; } + +info() { echo "==> $*"; } + +############################################################################## +# Argument parsing +############################################################################## + +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + --cluster-name) + CLUSTER_NAME="$2" + shift 2 + ;; + --skip-verify) + SKIP_VERIFY=true + shift + ;; + --verify-only) + VERIFY_ONLY=true + shift + ;; + --config-base) + CONFIG_BASE="$2" + shift 2 + ;; + -h|--help) + head -17 "$0" | tail -12 + exit 0 + ;; + *) + die "Unknown option: $1. Run '$0 --help' for usage." + ;; + esac + done +} + +############################################################################## +# Wizard — interactive inventory creation +############################################################################## + +run_wizard() { + info "No inventory_baremetal.ini found — starting interactive wizard" + echo "" + + local node_count + read -rp "Number of baremetal nodes [2]: " node_count + node_count="${node_count:-2}" + + if [[ "${node_count}" -lt 2 ]]; then + die "TNF requires at least 2 nodes" + fi + + local tmp_inventory + tmp_inventory=$(mktemp) + { + echo "# Generated by adopt-baremetal.sh wizard" + echo "" + echo "[baremetal_nodes]" + } > "${tmp_inventory}" + + for ((i = 0; i < node_count; i++)); do + local default_name="master-${i}" + echo "" + echo "--- Node $((i + 1)) of ${node_count} ---" + + local name bmc_ip bmc_user bmc_pass boot_mac + read -rp " Hostname [${default_name}]: " name + name="${name:-${default_name}}" + + read -rp " BMC IP address: " bmc_ip + [[ -z "${bmc_ip}" ]] && die "BMC IP is required" + + read -rp " BMC username [admin]: " bmc_user + bmc_user="${bmc_user:-admin}" + + read -rsp " BMC password: " bmc_pass + echo "" + [[ -z "${bmc_pass}" ]] && die "BMC password is required" + + read -rp " Boot MAC address: " boot_mac + [[ -z "${boot_mac}" ]] && die "Boot MAC is required" + + echo "${name} bmc_ip=${bmc_ip} bmc_user=${bmc_user} bmc_pass=${bmc_pass} boot_mac=${boot_mac}" >> "${tmp_inventory}" + done + + { + echo "" + echo "[baremetal_nodes:vars]" + echo "bmc_driver=redfish" + echo "bmc_verify_ca=False" + echo "cpu_arch=x86_64" + } >> "${tmp_inventory}" + + mv "${tmp_inventory}" "${INVENTORY}" + echo "" + info "Inventory written to inventory_baremetal.ini" +} + +############################################################################## +# INI parser +############################################################################## + +parse_inventory() { + [[ -f "${INVENTORY}" ]] || die "Inventory file not found: ${INVENTORY}" + + local in_nodes=false + local in_vars=false + + while IFS= read -r line || [[ -n "${line}" ]]; do + # Strip comments and leading/trailing whitespace + line="${line%%#*}" + line="${line#"${line%%[![:space:]]*}"}" + line="${line%"${line##*[![:space:]]}"}" + [[ -z "${line}" ]] && continue + + if [[ "${line}" == "[baremetal_nodes]" ]]; then + in_nodes=true + in_vars=false + continue + elif [[ "${line}" == "[baremetal_nodes:vars]" ]]; then + in_nodes=false + in_vars=true + continue + elif [[ "${line}" =~ ^\[.*\] ]]; then + in_nodes=false + in_vars=false + continue + fi + + if ${in_vars}; then + local key val + key="${line%%=*}" + val="${line#*=}" + case "${key}" in + bmc_verify_ca) BMC_VERIFY_CA="${val}" ;; + cpu_arch) CPU_ARCH="${val}" ;; + esac + continue + fi + + if ${in_nodes}; then + local name rest + name="${line%% *}" + rest="${line#* }" + + local bmc_ip="" bmc_user="" bmc_pass="" boot_mac="" + for pair in ${rest}; do + local key val + key="${pair%%=*}" + val="${pair#*=}" + case "${key}" in + bmc_ip) bmc_ip="${val}" ;; + bmc_user) bmc_user="${val}" ;; + bmc_pass) bmc_pass="${val}" ;; + boot_mac) boot_mac="${val}" ;; + esac + done + + [[ -z "${bmc_ip}" ]] && die "Node '${name}': missing bmc_ip" + [[ -z "${bmc_user}" ]] && die "Node '${name}': missing bmc_user" + [[ -z "${bmc_pass}" ]] && die "Node '${name}': missing bmc_pass" + [[ -z "${boot_mac}" ]] && die "Node '${name}': missing boot_mac" + + NODE_NAMES+=("${name}") + NODE_BMC_IPS+=("${bmc_ip}") + NODE_BMC_USERS+=("${bmc_user}") + NODE_BMC_PASSES+=("${bmc_pass}") + NODE_BOOT_MACS+=("${boot_mac}") + fi + done < "${INVENTORY}" + + [[ ${#NODE_NAMES[@]} -eq 0 ]] && die "No nodes found in inventory" + info "Parsed ${#NODE_NAMES[@]} node(s) from inventory" +} + +############################################################################## +# BMC verification via Redfish +############################################################################## + +discover_redfish_system_id() { + local bmc_ip="$1" bmc_user="$2" bmc_pass="$3" + + local systems_json + systems_json=$(curl -sk --connect-timeout 5 --max-time 10 \ + -u "${bmc_user}:${bmc_pass}" \ + "https://${bmc_ip}/redfish/v1/Systems/" 2>/dev/null) || return 1 + + echo "${systems_json}" | jq -r '.Members[0]."@odata.id"' 2>/dev/null +} + +verify_bmc() { + local name="$1" bmc_ip="$2" bmc_user="$3" bmc_pass="$4" + local rc=0 + + printf " %-12s %-20s " "${name}" "${bmc_ip}" + + # Verify Redfish root is reachable and credentials work + local http_code + http_code=$(curl -sk --connect-timeout 5 --max-time 10 \ + -o /dev/null -w '%{http_code}' \ + -u "${bmc_user}:${bmc_pass}" \ + "https://${bmc_ip}/redfish/v1/" 2>/dev/null) || http_code="000" + + if [[ "${http_code}" == "200" ]]; then + echo "OK (HTTP ${http_code})" + elif [[ "${http_code}" == "401" ]]; then + echo "FAIL — bad credentials (HTTP 401)" + rc=1 + elif [[ "${http_code}" == "000" ]]; then + echo "FAIL — unreachable" + rc=1 + else + echo "FAIL (HTTP ${http_code})" + rc=1 + fi + + return ${rc} +} + +verify_all_bmcs() { + info "Verifying BMC credentials via Redfish" + echo "" + + local failed=0 + for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do + if ! verify_bmc "${NODE_NAMES[$i]}" "${NODE_BMC_IPS[$i]}" \ + "${NODE_BMC_USERS[$i]}" "${NODE_BMC_PASSES[$i]}"; then + failed=$((failed + 1)) + fi + done + echo "" + + if [[ ${failed} -gt 0 ]]; then + die "${failed} node(s) failed BMC verification" + fi + info "All BMC endpoints verified" +} + +############################################################################## +# Artifact generation +############################################################################## + +generate_ironic_nodes_json() { + local output_file="$1" + + info "Generating ironic_nodes.json" + + local nodes_json='{"nodes":[' + local first=true + + for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do + local name="${NODE_NAMES[$i]}" + local bmc_ip="${NODE_BMC_IPS[$i]}" + local bmc_user="${NODE_BMC_USERS[$i]}" + local bmc_pass="${NODE_BMC_PASSES[$i]}" + local boot_mac="${NODE_BOOT_MACS[$i]}" + + # Discover the Redfish system path from the BMC, fall back to standard + local system_id + system_id=$(discover_redfish_system_id "${bmc_ip}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true + system_id="${system_id:-/redfish/v1/Systems/1}" + # Strip leading slash for URL construction + system_id="${system_id#/}" + + ${first} || nodes_json+="," + first=false + + nodes_json+=$(cat < "${output_file}" + info " → ${output_file}" +} + +generate_baremetal_config() { + local output_file="$1" + local nodes_file_path="$2" + + info "Generating config_baremetal_fencing.sh" + + # Find the base config to derive from + local base_config="${CONFIG_BASE}" + if [[ -z "${base_config}" ]]; then + local files_dir="${OC_DIR}/roles/dev-scripts/install-dev/files" + if [[ -f "${files_dir}/config_fencing.sh" ]]; then + base_config="${files_dir}/config_fencing.sh" + elif [[ -f "${files_dir}/config_fencing_example.sh" ]]; then + base_config="${files_dir}/config_fencing_example.sh" + else + die "No base config found. Provide one with --config-base." + fi + fi + + [[ -f "${base_config}" ]] || die "Base config not found: ${base_config}" + info " Base config: ${base_config}" + + { + cat "${base_config}" + echo "" + echo "# Baremetal adoption overrides (generated by adopt-baremetal.sh)" + echo "export NODES_PLATFORM=baremetal" + echo "export NODES_FILE=\"${nodes_file_path}\"" + } > "${output_file}" + + info " → ${output_file}" +} + +############################################################################## +# Main +############################################################################## + +main() { + parse_args "$@" + + # Wizard if no inventory exists + if [[ ! -f "${INVENTORY}" ]]; then + run_wizard + fi + + parse_inventory + + # BMC verification + if ! ${SKIP_VERIFY}; then + verify_all_bmcs + fi + + if ${VERIFY_ONLY}; then + info "Verification complete (--verify-only). No artifacts generated." + exit 0 + fi + + # Create output directory + local output_dir="${OC_DIR}/clusters/${CLUSTER_NAME}" + mkdir -p "${output_dir}" + + # Generate artifacts + local nodes_file="${output_dir}/ironic_nodes.json" + generate_ironic_nodes_json "${nodes_file}" + + # NODES_FILE path on the hypervisor — resolves when dev-scripts sources the config + local remote_nodes_path="\${PWD}/ironic_nodes.json" + generate_baremetal_config "${output_dir}/config_baremetal_fencing.sh" "${remote_nodes_path}" + + echo "" + info "Adoption complete. Generated artifacts:" + echo " ${nodes_file}" + echo " ${output_dir}/config_baremetal_fencing.sh" + echo "" + echo " Next: deploy to hypervisor with the baremetal install workflow (OCPEDGE-2775)" +} + +main "$@" From 251d1277ba5d7637ea71562b254c13cf4de202e0 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 12:59:15 +0200 Subject: [PATCH 02/20] Extract wizard into standalone script with input validation Split the interactive wizard out of adopt-baremetal.sh into its own baremetal-wizard.sh script. Add input validation (IPv4, MAC format, hostname), re-prompt on invalid input instead of dying, a summary table with masked passwords before confirmation, and Y/n/q flow. Rename all baremetal scripts and Make targets to use a baremetal-* prefix for consistent grouping (baremetal-adopt, baremetal-verify, baremetal-wizard). OCPEDGE-2774 Co-Authored-By: Claude Opus 4.6 --- deploy/Makefile | 16 +- ...{adopt-baremetal.sh => baremetal-adopt.sh} | 66 +--- .../scripts/baremetal-wizard.sh | 304 ++++++++++++++++++ 3 files changed, 316 insertions(+), 70 deletions(-) rename deploy/openshift-clusters/scripts/{adopt-baremetal.sh => baremetal-adopt.sh} (84%) create mode 100755 deploy/openshift-clusters/scripts/baremetal-wizard.sh diff --git a/deploy/Makefile b/deploy/Makefile index f7d20512..cd709341 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -92,11 +92,14 @@ fencing-assisted: keep-instance: @../helpers/keep-instance.sh '$(DAYS)' -adopt-baremetal: - @./openshift-clusters/scripts/adopt-baremetal.sh +baremetal-adopt: + @./openshift-clusters/scripts/baremetal-adopt.sh -verify-baremetal: - @./openshift-clusters/scripts/adopt-baremetal.sh --verify-only +baremetal-verify: + @./openshift-clusters/scripts/baremetal-adopt.sh --verify-only + +baremetal-wizard: + @./openshift-clusters/scripts/baremetal-wizard.sh patch-nodes: @./openshift-clusters/scripts/patch-nodes.sh @@ -145,8 +148,9 @@ help: @echo " patch-nodes - Build resource-agents RPM and patch cluster nodes (default version: 4.11)" @echo "" @echo "Baremetal Adoption:" - @echo " adopt-baremetal - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts" - @echo " verify-baremetal - Verify BMC credentials for adopted baremetal nodes (no artifacts)" + @echo " baremetal-adopt - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts" + @echo " baremetal-verify - Verify BMC credentials for adopted baremetal nodes (no artifacts)" + @echo " baremetal-wizard - Interactive wizard to create baremetal node inventory" @echo "" @echo "Cluster Utilities:" @echo " get-tnf-logs - Collect pacemaker and etcd logs from cluster nodes" diff --git a/deploy/openshift-clusters/scripts/adopt-baremetal.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh similarity index 84% rename from deploy/openshift-clusters/scripts/adopt-baremetal.sh rename to deploy/openshift-clusters/scripts/baremetal-adopt.sh index b3473c12..3657ab78 100755 --- a/deploy/openshift-clusters/scripts/adopt-baremetal.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -81,68 +81,6 @@ parse_args() { done } -############################################################################## -# Wizard — interactive inventory creation -############################################################################## - -run_wizard() { - info "No inventory_baremetal.ini found — starting interactive wizard" - echo "" - - local node_count - read -rp "Number of baremetal nodes [2]: " node_count - node_count="${node_count:-2}" - - if [[ "${node_count}" -lt 2 ]]; then - die "TNF requires at least 2 nodes" - fi - - local tmp_inventory - tmp_inventory=$(mktemp) - { - echo "# Generated by adopt-baremetal.sh wizard" - echo "" - echo "[baremetal_nodes]" - } > "${tmp_inventory}" - - for ((i = 0; i < node_count; i++)); do - local default_name="master-${i}" - echo "" - echo "--- Node $((i + 1)) of ${node_count} ---" - - local name bmc_ip bmc_user bmc_pass boot_mac - read -rp " Hostname [${default_name}]: " name - name="${name:-${default_name}}" - - read -rp " BMC IP address: " bmc_ip - [[ -z "${bmc_ip}" ]] && die "BMC IP is required" - - read -rp " BMC username [admin]: " bmc_user - bmc_user="${bmc_user:-admin}" - - read -rsp " BMC password: " bmc_pass - echo "" - [[ -z "${bmc_pass}" ]] && die "BMC password is required" - - read -rp " Boot MAC address: " boot_mac - [[ -z "${boot_mac}" ]] && die "Boot MAC is required" - - echo "${name} bmc_ip=${bmc_ip} bmc_user=${bmc_user} bmc_pass=${bmc_pass} boot_mac=${boot_mac}" >> "${tmp_inventory}" - done - - { - echo "" - echo "[baremetal_nodes:vars]" - echo "bmc_driver=redfish" - echo "bmc_verify_ca=False" - echo "cpu_arch=x86_64" - } >> "${tmp_inventory}" - - mv "${tmp_inventory}" "${INVENTORY}" - echo "" - info "Inventory written to inventory_baremetal.ini" -} - ############################################################################## # INI parser ############################################################################## @@ -379,9 +317,9 @@ generate_baremetal_config() { main() { parse_args "$@" - # Wizard if no inventory exists + # Launch interactive wizard if no inventory exists if [[ ! -f "${INVENTORY}" ]]; then - run_wizard + "${SCRIPT_DIR}/baremetal-wizard.sh" --output "${INVENTORY}" fi parse_inventory diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh new file mode 100755 index 00000000..de6de400 --- /dev/null +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -0,0 +1,304 @@ +#!/usr/bin/bash +# +# Interactive wizard for creating a baremetal node inventory. +# +# Collects BMC credentials and network info for each node, validates input, +# displays a summary for confirmation, and writes inventory_baremetal.ini. +# +# Usage: +# wizard-baremetal.sh [options] +# +# Options: +# --output FILE Inventory output path (default: inventory_baremetal.ini) +# -h, --help Show this help message + +set -o nounset +set -o errexit +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +OC_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + +OUTPUT="${OC_DIR}/inventory_baremetal.ini" + +############################################################################## +# Helpers +############################################################################## + +die() { echo "Error: $*" >&2; exit 1; } + +info() { echo "==> $*"; } + +############################################################################## +# Argument parsing +############################################################################## + +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + --output) + OUTPUT="$2" + shift 2 + ;; + -h|--help) + head -14 "$0" | tail -9 + exit 0 + ;; + *) + die "Unknown option: $1. Run '$0 --help' for usage." + ;; + esac + done +} + +############################################################################## +# Validators +############################################################################## + +valid_ipv4() { + local ip="$1" + local IFS='.' + local -a octets + read -ra octets <<< "${ip}" + [[ ${#octets[@]} -ne 4 ]] && return 1 + local octet + for octet in "${octets[@]}"; do + [[ "${octet}" =~ ^[0-9]+$ ]] || return 1 + (( octet > 255 )) && return 1 + done + return 0 +} + +valid_mac() { + local mac="$1" + [[ "${mac}" =~ ^([0-9a-fA-F]{2}:){5}[0-9a-fA-F]{2}$ ]] +} + +valid_hostname() { + local name="$1" + [[ -n "${name}" ]] && [[ "${name}" =~ ^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?$ ]] +} + +############################################################################## +# Prompt functions +# +# Each loops until valid input is received. Values go to stdout (for capture +# with val=$(...)), prompts and errors go to stderr (displayed on terminal). +############################################################################## + +prompt_node_count() { + local count + while true; do + read -rp "Number of baremetal nodes [2]: " count + count="${count:-2}" + if ! [[ "${count}" =~ ^[0-9]+$ ]]; then + echo " Error: must be a number" >&2 + continue + fi + if (( count < 2 )); then + echo " Error: TNF requires at least 2 nodes" >&2 + continue + fi + echo "${count}" + return + done +} + +prompt_hostname() { + local default_name="$1" + local name + while true; do + read -rp " Hostname [${default_name}]: " name + name="${name:-${default_name}}" + if ! valid_hostname "${name}"; then + echo " Error: invalid hostname (use alphanumeric, hyphens, dots)" >&2 + continue + fi + echo "${name}" + return + done +} + +prompt_bmc_ip() { + local ip + while true; do + read -rp " BMC IP address: " ip + if [[ -z "${ip}" ]]; then + echo " Error: BMC IP is required" >&2 + continue + fi + if ! valid_ipv4 "${ip}"; then + echo " Error: invalid IPv4 address (expected N.N.N.N)" >&2 + continue + fi + echo "${ip}" + return + done +} + +prompt_bmc_user() { + local user + read -rp " BMC username [admin]: " user + user="${user:-admin}" + echo "${user}" +} + +prompt_bmc_pass() { + local pass + while true; do + read -rsp " BMC password: " pass + echo "" >&2 + if [[ -z "${pass}" ]]; then + echo " Error: BMC password is required" >&2 + continue + fi + echo "${pass}" + return + done +} + +prompt_boot_mac() { + local mac + while true; do + read -rp " Boot MAC address: " mac + if [[ -z "${mac}" ]]; then + echo " Error: boot MAC is required" >&2 + continue + fi + if ! valid_mac "${mac}"; then + echo " Error: invalid MAC (expected XX:XX:XX:XX:XX:XX)" >&2 + continue + fi + echo "${mac}" + return + done +} + +############################################################################## +# Summary display +############################################################################## + +show_summary() { + echo "" + echo "==================================" + echo " BAREMETAL NODE SUMMARY" + echo "==================================" + printf " %-4s %-14s %-17s %-10s %-10s %-19s\n" \ + "#" "HOSTNAME" "BMC IP" "BMC USER" "PASSWORD" "BOOT MAC" + printf " %-4s %-14s %-17s %-10s %-10s %-19s\n" \ + "---" "------------" "---------------" "--------" "--------" "-----------------" + + local i + for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do + printf " %-4s %-14s %-17s %-10s %-10s %-19s\n" \ + "$((i + 1))" \ + "${WIZ_NAMES[$i]}" \ + "${WIZ_IPS[$i]}" \ + "${WIZ_USERS[$i]}" \ + "********" \ + "${WIZ_MACS[$i]}" + done + + echo "==================================" +} + +############################################################################## +# Wizard flow +############################################################################## + +run_wizard() { + info "Baremetal node inventory wizard" + echo "" + + while true; do + local node_count + node_count=$(prompt_node_count) + + WIZ_NAMES=() + WIZ_IPS=() + WIZ_USERS=() + WIZ_PASSES=() + WIZ_MACS=() + + local i + for ((i = 0; i < node_count; i++)); do + local default_name="master-${i}" + echo "" + echo "--- Node $((i + 1)) of ${node_count} ---" + + WIZ_NAMES+=("$(prompt_hostname "${default_name}")") + WIZ_IPS+=("$(prompt_bmc_ip)") + WIZ_USERS+=("$(prompt_bmc_user)") + WIZ_PASSES+=("$(prompt_bmc_pass)") + WIZ_MACS+=("$(prompt_boot_mac)") + done + + show_summary + + local confirm + read -rp "Proceed with this configuration? [Y/n/q]: " confirm + confirm="${confirm:-Y}" + + case "${confirm}" in + [Yy]|[Yy]es) + break + ;; + [Qq]|[Qq]uit) + die "Wizard cancelled by user" + ;; + *) + echo "" + info "Starting over — re-enter node information" + echo "" + continue + ;; + esac + done + + write_inventory +} + +############################################################################## +# Inventory writer +############################################################################## + +write_inventory() { + local tmp_inventory + tmp_inventory=$(mktemp) + + { + echo "# Generated by wizard-baremetal.sh" + echo "" + echo "[baremetal_nodes]" + } > "${tmp_inventory}" + + local i + for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do + echo "${WIZ_NAMES[$i]} bmc_ip=${WIZ_IPS[$i]} bmc_user=${WIZ_USERS[$i]} bmc_pass=${WIZ_PASSES[$i]} boot_mac=${WIZ_MACS[$i]}" >> "${tmp_inventory}" + done + + { + echo "" + echo "[baremetal_nodes:vars]" + echo "bmc_driver=redfish" + echo "bmc_verify_ca=False" + echo "cpu_arch=x86_64" + } >> "${tmp_inventory}" + + mv "${tmp_inventory}" "${OUTPUT}" + echo "" + info "Inventory written to ${OUTPUT}" +} + +############################################################################## +# Main +############################################################################## + +declare -a WIZ_NAMES=() +declare -a WIZ_IPS=() +declare -a WIZ_USERS=() +declare -a WIZ_PASSES=() +declare -a WIZ_MACS=() + +parse_args "$@" +run_wizard From 314088847b94af97634015aa67a5ec74e0930865 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 15:06:18 +0200 Subject: [PATCH 03/20] Accept hostnames in BMC address wizard input The wizard only accepted IPv4 addresses for BMC endpoints, but real environments (e.g., HPE iLO) commonly use FQDNs. Accept both IPv4 and hostnames, and update prompts/labels accordingly. Co-Authored-By: Claude Opus 4.6 --- .../inventory_baremetal.ini.sample | 4 +-- .../scripts/baremetal-wizard.sh | 33 +++++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index 88019edc..e4e35e27 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -6,10 +6,10 @@ # inventory_baremetal.ini → baremetal nodes (BMC endpoints for adoption) # # Copy this file to inventory_baremetal.ini and fill in your node details. -# Then run: make adopt-baremetal +# Then run: make baremetal-adopt # # Each node requires: -# bmc_ip - BMC/iDRAC/iLO management IP address +# bmc_ip - BMC/iDRAC/iLO management address (IP or hostname) # bmc_user - BMC login username # bmc_pass - BMC login password # boot_mac - MAC address of the NIC used for PXE boot diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh index de6de400..7bd567c6 100755 --- a/deploy/openshift-clusters/scripts/baremetal-wizard.sh +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -79,6 +79,11 @@ valid_hostname() { [[ -n "${name}" ]] && [[ "${name}" =~ ^[a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?$ ]] } +valid_bmc_address() { + local addr="$1" + valid_ipv4 "${addr}" || valid_hostname "${addr}" +} + ############################################################################## # Prompt functions # @@ -119,19 +124,19 @@ prompt_hostname() { done } -prompt_bmc_ip() { - local ip +prompt_bmc_address() { + local addr while true; do - read -rp " BMC IP address: " ip - if [[ -z "${ip}" ]]; then - echo " Error: BMC IP is required" >&2 + read -rp " BMC address (IP or hostname): " addr + if [[ -z "${addr}" ]]; then + echo " Error: BMC address is required" >&2 continue fi - if ! valid_ipv4 "${ip}"; then - echo " Error: invalid IPv4 address (expected N.N.N.N)" >&2 + if ! valid_bmc_address "${addr}"; then + echo " Error: invalid address (expected IPv4 or FQDN)" >&2 continue fi - echo "${ip}" + echo "${addr}" return done } @@ -183,14 +188,14 @@ show_summary() { echo "==================================" echo " BAREMETAL NODE SUMMARY" echo "==================================" - printf " %-4s %-14s %-17s %-10s %-10s %-19s\n" \ - "#" "HOSTNAME" "BMC IP" "BMC USER" "PASSWORD" "BOOT MAC" - printf " %-4s %-14s %-17s %-10s %-10s %-19s\n" \ - "---" "------------" "---------------" "--------" "--------" "-----------------" + printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ + "#" "HOSTNAME" "BMC ADDRESS" "BMC USER" "PASSWORD" "BOOT MAC" + printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ + "---" "------------" "--------------------------------------" "--------" "--------" "-----------------" local i for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do - printf " %-4s %-14s %-17s %-10s %-10s %-19s\n" \ + printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ "$((i + 1))" \ "${WIZ_NAMES[$i]}" \ "${WIZ_IPS[$i]}" \ @@ -227,7 +232,7 @@ run_wizard() { echo "--- Node $((i + 1)) of ${node_count} ---" WIZ_NAMES+=("$(prompt_hostname "${default_name}")") - WIZ_IPS+=("$(prompt_bmc_ip)") + WIZ_IPS+=("$(prompt_bmc_address)") WIZ_USERS+=("$(prompt_bmc_user)") WIZ_PASSES+=("$(prompt_bmc_pass)") WIZ_MACS+=("$(prompt_boot_mac)") From 1a1c5e35cce2733d0a2df1c7159cd26d7dac9f0c Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 15:10:20 +0200 Subject: [PATCH 04/20] Rename bmc_ip to bmc_address, make boot_mac optional MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bmc_address accepts both IPs and hostnames (matching wizard change). boot_mac is now optional — when omitted, the adopt script queries Redfish EthernetInterfaces for an enabled NIC's MAC. Falls back to a clear warning if discovery fails (e.g., firmware doesn't expose MACs). Co-Authored-By: Claude Opus 4.6 --- .../inventory_baremetal.ini.sample | 13 ++-- .../scripts/baremetal-adopt.sh | 66 ++++++++++++++----- .../scripts/baremetal-wizard.sh | 15 +++-- 3 files changed, 68 insertions(+), 26 deletions(-) diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index e4e35e27..3300d057 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -9,17 +9,18 @@ # Then run: make baremetal-adopt # # Each node requires: -# bmc_ip - BMC/iDRAC/iLO management address (IP or hostname) -# bmc_user - BMC login username -# bmc_pass - BMC login password -# boot_mac - MAC address of the NIC used for PXE boot +# bmc_address - BMC/iDRAC/iLO management address (IP or hostname) +# bmc_user - BMC login username +# bmc_pass - BMC login password +# boot_mac - (optional) MAC address of the NIC used for PXE boot +# If omitted, the adopt script attempts Redfish discovery. # # The hostname (first field) becomes the node name in ironic_nodes.json. # For TNF, you need exactly 2 nodes (master-0 and master-1). [baremetal_nodes] -master-0 bmc_ip=192.168.1.100 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:01 -master-1 bmc_ip=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:02 +master-0 bmc_address=192.168.1.100 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:01 +master-1 bmc_address=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:02 [baremetal_nodes:vars] # BMC driver — only redfish is supported for TNF fencing diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 3657ab78..fc1f9f64 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -30,7 +30,7 @@ INVENTORY="${OC_DIR}/inventory_baremetal.ini" # Node data arrays — populated by parse_inventory declare -a NODE_NAMES=() -declare -a NODE_BMC_IPS=() +declare -a NODE_BMC_ADDRS=() declare -a NODE_BMC_USERS=() declare -a NODE_BMC_PASSES=() declare -a NODE_BOOT_MACS=() @@ -128,26 +128,25 @@ parse_inventory() { name="${line%% *}" rest="${line#* }" - local bmc_ip="" bmc_user="" bmc_pass="" boot_mac="" + local bmc_address="" bmc_user="" bmc_pass="" boot_mac="" for pair in ${rest}; do local key val key="${pair%%=*}" val="${pair#*=}" case "${key}" in - bmc_ip) bmc_ip="${val}" ;; + bmc_address) bmc_address="${val}" ;; bmc_user) bmc_user="${val}" ;; bmc_pass) bmc_pass="${val}" ;; boot_mac) boot_mac="${val}" ;; esac done - [[ -z "${bmc_ip}" ]] && die "Node '${name}': missing bmc_ip" + [[ -z "${bmc_address}" ]] && die "Node '${name}': missing bmc_address" [[ -z "${bmc_user}" ]] && die "Node '${name}': missing bmc_user" [[ -z "${bmc_pass}" ]] && die "Node '${name}': missing bmc_pass" - [[ -z "${boot_mac}" ]] && die "Node '${name}': missing boot_mac" NODE_NAMES+=("${name}") - NODE_BMC_IPS+=("${bmc_ip}") + NODE_BMC_ADDRS+=("${bmc_address}") NODE_BMC_USERS+=("${bmc_user}") NODE_BMC_PASSES+=("${bmc_pass}") NODE_BOOT_MACS+=("${boot_mac}") @@ -163,28 +162,53 @@ parse_inventory() { ############################################################################## discover_redfish_system_id() { - local bmc_ip="$1" bmc_user="$2" bmc_pass="$3" + local bmc_address="$1" bmc_user="$2" bmc_pass="$3" local systems_json systems_json=$(curl -sk --connect-timeout 5 --max-time 10 \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_ip}/redfish/v1/Systems/" 2>/dev/null) || return 1 + "https://${bmc_address}/redfish/v1/Systems/" 2>/dev/null) || return 1 echo "${systems_json}" | jq -r '.Members[0]."@odata.id"' 2>/dev/null } +discover_boot_mac() { + local bmc_address="$1" bmc_user="$2" bmc_pass="$3" system_id="$4" + + local ifaces_json mac + ifaces_json=$(curl -sk --connect-timeout 5 --max-time 15 \ + -u "${bmc_user}:${bmc_pass}" \ + "https://${bmc_address}/${system_id}EthernetInterfaces/" 2>/dev/null) || return 1 + + local iface_paths + iface_paths=$(echo "${ifaces_json}" | jq -r '.Members[]."@odata.id"' 2>/dev/null) || return 1 + + for iface_path in ${iface_paths}; do + mac=$(curl -sk --connect-timeout 5 --max-time 10 \ + -u "${bmc_user}:${bmc_pass}" \ + "https://${bmc_address}${iface_path}" 2>/dev/null \ + | jq -r 'select(.Status.State == "Enabled") | .MACAddress // empty' 2>/dev/null) + + if [[ -n "${mac}" && "${mac}" != "00:00:00:00:00:00" ]]; then + echo "${mac}" + return 0 + fi + done + return 1 +} + verify_bmc() { - local name="$1" bmc_ip="$2" bmc_user="$3" bmc_pass="$4" + local name="$1" bmc_address="$2" bmc_user="$3" bmc_pass="$4" local rc=0 - printf " %-12s %-20s " "${name}" "${bmc_ip}" + printf " %-12s %-20s " "${name}" "${bmc_address}" # Verify Redfish root is reachable and credentials work local http_code http_code=$(curl -sk --connect-timeout 5 --max-time 10 \ -o /dev/null -w '%{http_code}' \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_ip}/redfish/v1/" 2>/dev/null) || http_code="000" + "https://${bmc_address}/redfish/v1/" 2>/dev/null) || http_code="000" if [[ "${http_code}" == "200" ]]; then echo "OK (HTTP ${http_code})" @@ -208,7 +232,7 @@ verify_all_bmcs() { local failed=0 for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do - if ! verify_bmc "${NODE_NAMES[$i]}" "${NODE_BMC_IPS[$i]}" \ + if ! verify_bmc "${NODE_NAMES[$i]}" "${NODE_BMC_ADDRS[$i]}" \ "${NODE_BMC_USERS[$i]}" "${NODE_BMC_PASSES[$i]}"; then failed=$((failed + 1)) fi @@ -235,18 +259,30 @@ generate_ironic_nodes_json() { for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do local name="${NODE_NAMES[$i]}" - local bmc_ip="${NODE_BMC_IPS[$i]}" + local bmc_address="${NODE_BMC_ADDRS[$i]}" local bmc_user="${NODE_BMC_USERS[$i]}" local bmc_pass="${NODE_BMC_PASSES[$i]}" local boot_mac="${NODE_BOOT_MACS[$i]}" # Discover the Redfish system path from the BMC, fall back to standard local system_id - system_id=$(discover_redfish_system_id "${bmc_ip}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true + system_id=$(discover_redfish_system_id "${bmc_address}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true system_id="${system_id:-/redfish/v1/Systems/1}" # Strip leading slash for URL construction system_id="${system_id#/}" + # Auto-discover boot MAC via Redfish if not provided + if [[ -z "${boot_mac}" ]]; then + info " ${name}: boot_mac not set, attempting Redfish discovery..." + boot_mac=$(discover_boot_mac "${bmc_address}" "${bmc_user}" "${bmc_pass}" "${system_id}" 2>/dev/null) || true + if [[ -n "${boot_mac}" ]]; then + info " ${name}: discovered boot MAC ${boot_mac}" + else + echo " WARNING: ${name}: could not discover boot MAC — set boot_mac in inventory" >&2 + boot_mac="DISCOVERY_FAILED" + fi + fi + ${first} || nodes_json+="," first=false @@ -255,7 +291,7 @@ generate_ironic_nodes_json() { "name": "${name}", "driver": "redfish", "driver_info": { - "address": "redfish://${bmc_ip}/${system_id}", + "address": "redfish://${bmc_address}/${system_id}", "username": "${bmc_user}", "password": "${bmc_pass}", "redfish_verify_ca": "${BMC_VERIFY_CA}" diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh index 7bd567c6..b33d4e1c 100755 --- a/deploy/openshift-clusters/scripts/baremetal-wizard.sh +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -165,10 +165,10 @@ prompt_bmc_pass() { prompt_boot_mac() { local mac while true; do - read -rp " Boot MAC address: " mac + read -rp " Boot MAC address (Enter to auto-discover): " mac if [[ -z "${mac}" ]]; then - echo " Error: boot MAC is required" >&2 - continue + echo "${mac}" + return fi if ! valid_mac "${mac}"; then echo " Error: invalid MAC (expected XX:XX:XX:XX:XX:XX)" >&2 @@ -195,13 +195,14 @@ show_summary() { local i for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do + local display_mac="${WIZ_MACS[$i]:-auto-discover}" printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ "$((i + 1))" \ "${WIZ_NAMES[$i]}" \ "${WIZ_IPS[$i]}" \ "${WIZ_USERS[$i]}" \ "********" \ - "${WIZ_MACS[$i]}" + "${display_mac}" done echo "==================================" @@ -279,7 +280,11 @@ write_inventory() { local i for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do - echo "${WIZ_NAMES[$i]} bmc_ip=${WIZ_IPS[$i]} bmc_user=${WIZ_USERS[$i]} bmc_pass=${WIZ_PASSES[$i]} boot_mac=${WIZ_MACS[$i]}" >> "${tmp_inventory}" + local line="${WIZ_NAMES[$i]} bmc_address=${WIZ_IPS[$i]} bmc_user=${WIZ_USERS[$i]} bmc_pass=${WIZ_PASSES[$i]}" + if [[ -n "${WIZ_MACS[$i]}" ]]; then + line+=" boot_mac=${WIZ_MACS[$i]}" + fi + echo "${line}" >> "${tmp_inventory}" done { From 10747b27804cb9a4d778f207044b06e00588d93c Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 15:24:32 +0200 Subject: [PATCH 05/20] Use Redfish BootOptions for boot MAC discovery Replace the EthernetInterfaces-based discovery (which returned the wrong NIC) with BootOptions-based discovery. Walks the BIOS boot order, finds the first PXE IPv4 entry, and extracts the MAC from the UEFI device path. Tested against HPE iLO 5 (Edgeline e920t). Co-Authored-By: Claude Opus 4.6 --- .../scripts/baremetal-adopt.sh | 46 ++++++++++++++----- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index fc1f9f64..911deec9 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -175,22 +175,46 @@ discover_redfish_system_id() { discover_boot_mac() { local bmc_address="$1" bmc_user="$2" bmc_pass="$3" system_id="$4" - local ifaces_json mac - ifaces_json=$(curl -sk --connect-timeout 5 --max-time 15 \ + # Get boot order from the system resource + local boot_order + boot_order=$(curl -sk --connect-timeout 5 --max-time 10 \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}/${system_id}EthernetInterfaces/" 2>/dev/null) || return 1 + "https://${bmc_address}/${system_id}" 2>/dev/null \ + | jq -r '.Boot.BootOrder[]' 2>/dev/null) || return 1 - local iface_paths - iface_paths=$(echo "${ifaces_json}" | jq -r '.Members[]."@odata.id"' 2>/dev/null) || return 1 + # Fetch all boot options and index by BootOptionReference + local options_json + options_json=$(curl -sk --connect-timeout 5 --max-time 10 \ + -u "${bmc_user}:${bmc_pass}" \ + "https://${bmc_address}/${system_id}BootOptions/" 2>/dev/null) || return 1 + + local option_paths + option_paths=$(echo "${options_json}" | jq -r '.Members[]."@odata.id"' 2>/dev/null) || return 1 - for iface_path in ${iface_paths}; do - mac=$(curl -sk --connect-timeout 5 --max-time 10 \ + # Build associative arrays: ref → display_name, ref → uefi_path + declare -A opt_display opt_path + for option_url in ${option_paths}; do + local option + option=$(curl -sk --connect-timeout 5 --max-time 10 \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}${iface_path}" 2>/dev/null \ - | jq -r 'select(.Status.State == "Enabled") | .MACAddress // empty' 2>/dev/null) + "https://${bmc_address}${option_url}" 2>/dev/null) || continue + + local ref + ref=$(echo "${option}" | jq -r '.BootOptionReference // empty' 2>/dev/null) + [[ -z "${ref}" ]] && continue + opt_display["${ref}"]=$(echo "${option}" | jq -r '.DisplayName // empty' 2>/dev/null) + opt_path["${ref}"]=$(echo "${option}" | jq -r '.UefiDevicePath // empty' 2>/dev/null) + done + + # Walk boot order, find the first PXE IPv4 entry + for boot_ref in ${boot_order}; do + local display_name="${opt_display[${boot_ref}]:-}" + local uefi_path="${opt_path[${boot_ref}]:-}" - if [[ -n "${mac}" && "${mac}" != "00:00:00:00:00:00" ]]; then - echo "${mac}" + if [[ "${display_name}" == *"PXE IPv4"* ]] && [[ "${uefi_path}" == *MAC* ]]; then + local raw_mac + raw_mac=$(echo "${uefi_path}" | grep -oP 'MAC\(\K[0-9A-Fa-f]+' 2>/dev/null) || continue + echo "${raw_mac}" | sed 's/\(..\)/\1:/g; s/:$//' | tr '[:lower:]' '[:upper:]' return 0 fi done From 84ebb546837d0c6a04eb3e4cc73e36ed77ef8e97 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 16:27:46 +0200 Subject: [PATCH 06/20] Address CodeRabbit review findings on PR #88 - Fix jq null handling in Redfish system ID discovery (// empty) - Fix script names in usage text and generated comments - Fix missing / separator in BootOptions URL construction - Add --inventory flag and info messages to wizard trigger - Add warning when inventory has != 2 nodes - Honor bmc_verify_ca in curl calls via bmc_curl wrapper - Restrict permissions on generated credential artifacts (umask 077) Co-Authored-By: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-Authored-By: Claude Opus 4.6 --- .../scripts/baremetal-adopt.sh | 37 ++++++++++++++----- .../scripts/baremetal-wizard.sh | 4 +- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 911deec9..8f4c5a46 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -6,12 +6,13 @@ # and generates ironic_nodes.json + config_baremetal_fencing.sh for dev-scripts. # # Usage: -# adopt-baremetal.sh [options] +# baremetal-adopt.sh [options] # # Options: # --cluster-name NAME Cluster name for output directory (default: ostest) # --skip-verify Skip BMC credential verification # --verify-only Only verify BMC credentials, don't generate artifacts +# --inventory FILE Path to baremetal inventory (default: inventory_baremetal.ini) # --config-base FILE Base config to derive baremetal config from # -h, --help Show this help message @@ -66,6 +67,10 @@ parse_args() { VERIFY_ONLY=true shift ;; + --inventory) + INVENTORY="$2" + shift 2 + ;; --config-base) CONFIG_BASE="$2" shift 2 @@ -154,6 +159,9 @@ parse_inventory() { done < "${INVENTORY}" [[ ${#NODE_NAMES[@]} -eq 0 ]] && die "No nodes found in inventory" + if [[ ${#NODE_NAMES[@]} -ne 2 ]]; then + echo " WARNING: TNF requires exactly 2 nodes, found ${#NODE_NAMES[@]}" >&2 + fi info "Parsed ${#NODE_NAMES[@]} node(s) from inventory" } @@ -161,15 +169,21 @@ parse_inventory() { # BMC verification via Redfish ############################################################################## +bmc_curl() { + local opts=(-s --connect-timeout 5 --max-time 10) + [[ "${BMC_VERIFY_CA}" == "False" ]] && opts+=(-k) + curl "${opts[@]}" "$@" +} + discover_redfish_system_id() { local bmc_address="$1" bmc_user="$2" bmc_pass="$3" local systems_json - systems_json=$(curl -sk --connect-timeout 5 --max-time 10 \ + systems_json=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ "https://${bmc_address}/redfish/v1/Systems/" 2>/dev/null) || return 1 - echo "${systems_json}" | jq -r '.Members[0]."@odata.id"' 2>/dev/null + echo "${systems_json}" | jq -r '.Members[0]."@odata.id" // empty' 2>/dev/null } discover_boot_mac() { @@ -177,16 +191,16 @@ discover_boot_mac() { # Get boot order from the system resource local boot_order - boot_order=$(curl -sk --connect-timeout 5 --max-time 10 \ + boot_order=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ "https://${bmc_address}/${system_id}" 2>/dev/null \ | jq -r '.Boot.BootOrder[]' 2>/dev/null) || return 1 # Fetch all boot options and index by BootOptionReference local options_json - options_json=$(curl -sk --connect-timeout 5 --max-time 10 \ + options_json=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}/${system_id}BootOptions/" 2>/dev/null) || return 1 + "https://${bmc_address}/${system_id}/BootOptions/" 2>/dev/null) || return 1 local option_paths option_paths=$(echo "${options_json}" | jq -r '.Members[]."@odata.id"' 2>/dev/null) || return 1 @@ -195,7 +209,7 @@ discover_boot_mac() { declare -A opt_display opt_path for option_url in ${option_paths}; do local option - option=$(curl -sk --connect-timeout 5 --max-time 10 \ + option=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ "https://${bmc_address}${option_url}" 2>/dev/null) || continue @@ -229,7 +243,7 @@ verify_bmc() { # Verify Redfish root is reachable and credentials work local http_code - http_code=$(curl -sk --connect-timeout 5 --max-time 10 \ + http_code=$(bmc_curl \ -o /dev/null -w '%{http_code}' \ -u "${bmc_user}:${bmc_pass}" \ "https://${bmc_address}/redfish/v1/" 2>/dev/null) || http_code="000" @@ -362,7 +376,7 @@ generate_baremetal_config() { { cat "${base_config}" echo "" - echo "# Baremetal adoption overrides (generated by adopt-baremetal.sh)" + echo "# Baremetal adoption overrides (generated by baremetal-adopt.sh)" echo "export NODES_PLATFORM=baremetal" echo "export NODES_FILE=\"${nodes_file_path}\"" } > "${output_file}" @@ -379,6 +393,8 @@ main() { # Launch interactive wizard if no inventory exists if [[ ! -f "${INVENTORY}" ]]; then + info "No inventory found at ${INVENTORY}" + info "Launching interactive wizard (or provide --inventory PATH)" "${SCRIPT_DIR}/baremetal-wizard.sh" --output "${INVENTORY}" fi @@ -396,6 +412,7 @@ main() { # Create output directory local output_dir="${OC_DIR}/clusters/${CLUSTER_NAME}" + umask 077 mkdir -p "${output_dir}" # Generate artifacts @@ -411,7 +428,7 @@ main() { echo " ${nodes_file}" echo " ${output_dir}/config_baremetal_fencing.sh" echo "" - echo " Next: deploy to hypervisor with the baremetal install workflow (OCPEDGE-2775)" + echo " Next: deploy to the nodes using one of the baremetal-deploy* options" } main "$@" diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh index b33d4e1c..fd727493 100755 --- a/deploy/openshift-clusters/scripts/baremetal-wizard.sh +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -6,7 +6,7 @@ # displays a summary for confirmation, and writes inventory_baremetal.ini. # # Usage: -# wizard-baremetal.sh [options] +# baremetal-wizard.sh [options] # # Options: # --output FILE Inventory output path (default: inventory_baremetal.ini) @@ -273,7 +273,7 @@ write_inventory() { tmp_inventory=$(mktemp) { - echo "# Generated by wizard-baremetal.sh" + echo "# Generated by baremetal-wizard.sh" echo "" echo "[baremetal_nodes]" } > "${tmp_inventory}" From 388f2fe29a8b9980b49845a7e9040aefeba273c9 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 17:01:20 +0200 Subject: [PATCH 07/20] Address Doug's poka-yoke review findings on PR #88 - Replace heredoc JSON interpolation with jq -n --arg to prevent malformed output from passwords containing quotes or backslashes - Fail hard on missing boot MAC instead of writing DISCOVERY_FAILED placeholder that silently breaks dev-scripts downstream - Gate all BMC contact (discovery + verification) behind --skip-verify so offline users don't get silent failures baked into artifacts Co-Authored-By: Claude Opus 4.6 --- .../scripts/baremetal-adopt.sh | 77 +++++++++++-------- 1 file changed, 43 insertions(+), 34 deletions(-) diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 8f4c5a46..53ae602b 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -10,7 +10,7 @@ # # Options: # --cluster-name NAME Cluster name for output directory (default: ostest) -# --skip-verify Skip BMC credential verification +# --skip-verify Skip all BMC access (verify + discovery); requires boot_mac in inventory # --verify-only Only verify BMC credentials, don't generate artifacts # --inventory FILE Path to baremetal inventory (default: inventory_baremetal.ini) # --config-base FILE Base config to derive baremetal config from @@ -292,8 +292,8 @@ generate_ironic_nodes_json() { info "Generating ironic_nodes.json" - local nodes_json='{"nodes":[' - local first=true + local incomplete=false + local nodes=() for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do local name="${NODE_NAMES[$i]}" @@ -302,52 +302,61 @@ generate_ironic_nodes_json() { local bmc_pass="${NODE_BMC_PASSES[$i]}" local boot_mac="${NODE_BOOT_MACS[$i]}" - # Discover the Redfish system path from the BMC, fall back to standard + # Discover Redfish system path (requires BMC access) local system_id - system_id=$(discover_redfish_system_id "${bmc_address}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true - system_id="${system_id:-/redfish/v1/Systems/1}" - # Strip leading slash for URL construction - system_id="${system_id#/}" + if ${SKIP_VERIFY}; then + system_id="redfish/v1/Systems/1" + else + system_id=$(discover_redfish_system_id "${bmc_address}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true + system_id="${system_id:-/redfish/v1/Systems/1}" + system_id="${system_id#/}" + fi # Auto-discover boot MAC via Redfish if not provided if [[ -z "${boot_mac}" ]]; then + if ${SKIP_VERIFY}; then + echo " ERROR: ${name}: boot_mac required when using --skip-verify" >&2 + incomplete=true + continue + fi info " ${name}: boot_mac not set, attempting Redfish discovery..." boot_mac=$(discover_boot_mac "${bmc_address}" "${bmc_user}" "${bmc_pass}" "${system_id}" 2>/dev/null) || true if [[ -n "${boot_mac}" ]]; then info " ${name}: discovered boot MAC ${boot_mac}" else - echo " WARNING: ${name}: could not discover boot MAC — set boot_mac in inventory" >&2 - boot_mac="DISCOVERY_FAILED" + echo " ERROR: ${name}: could not discover boot MAC — set boot_mac in inventory" >&2 + incomplete=true + continue fi fi - ${first} || nodes_json+="," - first=false - - nodes_json+=$(cat < "${output_file}" + printf '%s\n' "${nodes[@]}" | jq -s '{nodes: .}' > "${output_file}" info " → ${output_file}" } From 4f112dee85eae67923bdc594434103804b06b0b6 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Mon, 29 Jun 2026 17:07:54 +0200 Subject: [PATCH 08/20] Fix boot MAC discovery failing on HPE iLO (trailing slash in system_id) Redfish @odata.id values include a trailing slash (/redfish/v1/Systems/1/), which produced a double-slash URL (.../Systems/1//BootOptions/) that iLO returns null content for. Strip the trailing slash after the leading one. Co-Authored-By: Claude Opus 4.6 --- deploy/openshift-clusters/scripts/baremetal-adopt.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 53ae602b..9ffa2e9c 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -310,6 +310,7 @@ generate_ironic_nodes_json() { system_id=$(discover_redfish_system_id "${bmc_address}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true system_id="${system_id:-/redfish/v1/Systems/1}" system_id="${system_id#/}" + system_id="${system_id%/}" fi # Auto-discover boot MAC via Redfish if not provided From 9b6cefb839a750cb722e3312bfffe7a70da77146 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 30 Jun 2026 10:44:30 +0200 Subject: [PATCH 09/20] Add OS-level network config to baremetal adoption workflow Add declarative [baremetal_network] INI section for cluster-wide network vars (machine_network, gateway, api_vip, ingress_vip) and per-node node_ip field. The adoption script translates these into dev-scripts exports (EXTERNAL_SUBNET_V4, BAREMETAL_GATEWAY, BAREMETAL_API_VIP, BAREMETAL_INGRESS_VIP, BAREMETAL_IPS) and always emits bridge overrides (MANAGE_BR_BRIDGE=n, MANAGE_PRO_BRIDGE=n, MANAGE_INT_BRIDGE=n). All new fields are optional for backward compatibility. The wizard emits skipped fields as commented placeholders so users can fill them later without referencing the sample file. BAREMETAL_IPS is only emitted when ALL nodes have node_ip set. Co-Authored-By: Claude Opus 4.6 --- .../inventory_baremetal.ini.sample | 17 +- .../files/config_fencing_example.sh | 3 + .../scripts/baremetal-adopt.sh | 66 ++++++- .../scripts/baremetal-wizard.sh | 163 +++++++++++++++++- 4 files changed, 236 insertions(+), 13 deletions(-) diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index 3300d057..47b73da5 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -14,13 +14,15 @@ # bmc_pass - BMC login password # boot_mac - (optional) MAC address of the NIC used for PXE boot # If omitted, the adopt script attempts Redfish discovery. +# node_ip - (optional) Static IP address for this node on the machine network +# Required for baremetal ABI deployments with static IPs. # # The hostname (first field) becomes the node name in ironic_nodes.json. # For TNF, you need exactly 2 nodes (master-0 and master-1). [baremetal_nodes] -master-0 bmc_address=192.168.1.100 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:01 -master-1 bmc_address=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:02 +master-0 bmc_address=192.168.1.100 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:01 node_ip=192.168.1.10 +master-1 bmc_address=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52:54:00:00:00:02 node_ip=192.168.1.11 [baremetal_nodes:vars] # BMC driver — only redfish is supported for TNF fencing @@ -31,3 +33,14 @@ bmc_verify_ca=False # Node CPU architecture cpu_arch=x86_64 + +[baremetal_network] +# Cluster-wide network config for baremetal ABI deployments (all optional). +# machine_network - Machine network CIDR (e.g. 192.168.1.0/24) +# gateway - Default gateway IP +# api_vip - API virtual IP +# ingress_vip - Ingress virtual IP +#machine_network=192.168.1.0/24 +#gateway=192.168.1.1 +#api_vip=192.168.1.100 +#ingress_vip=192.168.1.101 diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh index 2291e37c..2eacf2ee 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/config_fencing_example.sh @@ -36,3 +36,6 @@ export OPENSHIFT_INSTALL_EXPERIMENTAL_DISABLE_IMAGE_POLICY=true # export VBMC_IMAGE=quay.io/rh-edge-enablement/vbmc:2026-06 # export SUSHY_TOOLS_IMAGE=quay.io/rh-edge-enablement/sushy-tools:2026-06 # fi + +# Baremetal network config (node IPs, VIPs, bridge overrides) is auto-generated +# by 'make baremetal-adopt' into config_baremetal_fencing.sh — do not add here. diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 9ffa2e9c..4521407d 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -35,6 +35,13 @@ declare -a NODE_BMC_ADDRS=() declare -a NODE_BMC_USERS=() declare -a NODE_BMC_PASSES=() declare -a NODE_BOOT_MACS=() +declare -a NODE_IPS=() + +# Cluster-wide network config (optional, from [baremetal_network]) +MACHINE_NETWORK="" +GATEWAY="" +API_VIP="" +INGRESS_VIP="" # Group defaults BMC_VERIFY_CA="False" @@ -95,6 +102,7 @@ parse_inventory() { local in_nodes=false local in_vars=false + local in_network=false while IFS= read -r line || [[ -n "${line}" ]]; do # Strip comments and leading/trailing whitespace @@ -111,9 +119,15 @@ parse_inventory() { in_nodes=false in_vars=true continue + elif [[ "${line}" == "[baremetal_network]" ]]; then + in_nodes=false + in_vars=false + in_network=true + continue elif [[ "${line}" =~ ^\[.*\] ]]; then in_nodes=false in_vars=false + in_network=false continue fi @@ -128,21 +142,35 @@ parse_inventory() { continue fi + if ${in_network}; then + local key val + key="${line%%=*}" + val="${line#*=}" + case "${key}" in + machine_network) MACHINE_NETWORK="${val}" ;; + gateway) GATEWAY="${val}" ;; + api_vip) API_VIP="${val}" ;; + ingress_vip) INGRESS_VIP="${val}" ;; + esac + continue + fi + if ${in_nodes}; then local name rest name="${line%% *}" rest="${line#* }" - local bmc_address="" bmc_user="" bmc_pass="" boot_mac="" + local bmc_address="" bmc_user="" bmc_pass="" boot_mac="" node_ip="" for pair in ${rest}; do local key val key="${pair%%=*}" val="${pair#*=}" case "${key}" in - bmc_address) bmc_address="${val}" ;; - bmc_user) bmc_user="${val}" ;; - bmc_pass) bmc_pass="${val}" ;; - boot_mac) boot_mac="${val}" ;; + bmc_address) bmc_address="${val}" ;; + bmc_user) bmc_user="${val}" ;; + bmc_pass) bmc_pass="${val}" ;; + boot_mac) boot_mac="${val}" ;; + node_ip) node_ip="${val}" ;; esac done @@ -155,6 +183,7 @@ parse_inventory() { NODE_BMC_USERS+=("${bmc_user}") NODE_BMC_PASSES+=("${bmc_pass}") NODE_BOOT_MACS+=("${boot_mac}") + NODE_IPS+=("${node_ip}") fi done < "${INVENTORY}" @@ -389,6 +418,33 @@ generate_baremetal_config() { echo "# Baremetal adoption overrides (generated by baremetal-adopt.sh)" echo "export NODES_PLATFORM=baremetal" echo "export NODES_FILE=\"${nodes_file_path}\"" + echo "export MANAGE_BR_BRIDGE=n" + echo "export MANAGE_PRO_BRIDGE=n" + echo "export MANAGE_INT_BRIDGE=n" + + if [[ -n "${MACHINE_NETWORK}" || -n "${GATEWAY}" || -n "${API_VIP}" || -n "${INGRESS_VIP}" ]]; then + echo "" + echo "# Baremetal network config" + [[ -n "${MACHINE_NETWORK}" ]] && echo "export EXTERNAL_SUBNET_V4=\"${MACHINE_NETWORK}\"" + [[ -n "${GATEWAY}" ]] && echo "export BAREMETAL_GATEWAY=\"${GATEWAY}\"" + [[ -n "${API_VIP}" ]] && echo "export BAREMETAL_API_VIP=\"${API_VIP}\"" + [[ -n "${INGRESS_VIP}" ]] && echo "export BAREMETAL_INGRESS_VIP=\"${INGRESS_VIP}\"" + fi + + # Emit BAREMETAL_IPS only if ALL nodes have node_ip set + local all_have_ips=true + local ip_list="" + for ((i = 0; i < ${#NODE_IPS[@]}; i++)); do + if [[ -z "${NODE_IPS[$i]}" ]]; then + all_have_ips=false + break + fi + [[ -n "${ip_list}" ]] && ip_list+="," + ip_list+="${NODE_IPS[$i]}" + done + if ${all_have_ips} && [[ -n "${ip_list}" ]]; then + echo "export BAREMETAL_IPS=\"${ip_list}\"" + fi } > "${output_file}" info " → ${output_file}" diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh index fd727493..b21bf172 100755 --- a/deploy/openshift-clusters/scripts/baremetal-wizard.sh +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -84,6 +84,17 @@ valid_bmc_address() { valid_ipv4 "${addr}" || valid_hostname "${addr}" } +valid_cidr() { + local cidr="$1" + local ip="${cidr%%/*}" + local prefix="${cidr##*/}" + [[ "${cidr}" == *"/"* ]] || return 1 + valid_ipv4 "${ip}" || return 1 + [[ "${prefix}" =~ ^[0-9]+$ ]] || return 1 + (( prefix <= 32 )) || return 1 + return 0 +} + ############################################################################## # Prompt functions # @@ -179,6 +190,91 @@ prompt_boot_mac() { done } +prompt_node_ip() { + local ip + while true; do + read -rp " Node IP address (Enter to skip): " ip + if [[ -z "${ip}" ]]; then + echo "${ip}" + return + fi + if ! valid_ipv4 "${ip}"; then + echo " Error: invalid IPv4 address" >&2 + continue + fi + echo "${ip}" + return + done +} + +prompt_machine_network() { + local cidr + while true; do + read -rp " Machine network CIDR (e.g. 192.168.1.0/24, Enter to skip): " cidr + if [[ -z "${cidr}" ]]; then + echo "${cidr}" + return + fi + if ! valid_cidr "${cidr}"; then + echo " Error: invalid CIDR (expected x.x.x.x/prefix)" >&2 + continue + fi + echo "${cidr}" + return + done +} + +prompt_gateway() { + local gw + while true; do + read -rp " Gateway IP (Enter to skip): " gw + if [[ -z "${gw}" ]]; then + echo "${gw}" + return + fi + if ! valid_ipv4 "${gw}"; then + echo " Error: invalid IPv4 address" >&2 + continue + fi + echo "${gw}" + return + done +} + +prompt_api_vip() { + local vip + while true; do + read -rp " API VIP (Enter to skip): " vip + if [[ -z "${vip}" ]]; then + echo "${vip}" + return + fi + if ! valid_ipv4 "${vip}"; then + echo " Error: invalid IPv4 address" >&2 + continue + fi + echo "${vip}" + return + done +} + +prompt_ingress_vip() { + local vip + while true; do + read -rp " Ingress VIP (Enter to skip): " vip + if [[ -z "${vip}" ]]; then + echo "${vip}" + return + fi + if ! valid_ipv4 "${vip}"; then + echo " Error: invalid IPv4 address" >&2 + continue + fi + echo "${vip}" + return + done +} + ############################################################################## # Summary display ############################################################################## @@ -188,23 +284,34 @@ show_summary() { echo "==================================" echo " BAREMETAL NODE SUMMARY" echo "==================================" - printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ - "#" "HOSTNAME" "BMC ADDRESS" "BMC USER" "PASSWORD" "BOOT MAC" - printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ - "---" "------------" "--------------------------------------" "--------" "--------" "-----------------" + printf " %-4s %-14s %-40s %-10s %-10s %-19s %-17s\n" \ + "#" "HOSTNAME" "BMC ADDRESS" "BMC USER" "PASSWORD" "BOOT MAC" "NODE IP" + printf " %-4s %-14s %-40s %-10s %-10s %-19s %-17s\n" \ + "---" "------------" "--------------------------------------" "--------" "--------" "-----------------" "---------------" local i for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do local display_mac="${WIZ_MACS[$i]:-auto-discover}" - printf " %-4s %-14s %-40s %-10s %-10s %-19s\n" \ + local display_ip="${WIZ_NODE_IPS[$i]:---}" + printf " %-4s %-14s %-40s %-10s %-10s %-19s %-17s\n" \ "$((i + 1))" \ "${WIZ_NAMES[$i]}" \ "${WIZ_IPS[$i]}" \ "${WIZ_USERS[$i]}" \ "********" \ - "${display_mac}" + "${display_mac}" \ + "${display_ip}" done + if [[ -n "${WIZ_MACHINE_NETWORK}" || -n "${WIZ_GATEWAY}" || -n "${WIZ_API_VIP}" || -n "${WIZ_INGRESS_VIP}" ]]; then + echo "" + echo " Cluster Network:" + [[ -n "${WIZ_MACHINE_NETWORK}" ]] && echo " Machine network: ${WIZ_MACHINE_NETWORK}" + [[ -n "${WIZ_GATEWAY}" ]] && echo " Gateway: ${WIZ_GATEWAY}" + [[ -n "${WIZ_API_VIP}" ]] && echo " API VIP: ${WIZ_API_VIP}" + [[ -n "${WIZ_INGRESS_VIP}" ]] && echo " Ingress VIP: ${WIZ_INGRESS_VIP}" + fi + echo "==================================" } @@ -225,6 +332,7 @@ run_wizard() { WIZ_USERS=() WIZ_PASSES=() WIZ_MACS=() + WIZ_NODE_IPS=() local i for ((i = 0; i < node_count; i++)); do @@ -237,8 +345,16 @@ run_wizard() { WIZ_USERS+=("$(prompt_bmc_user)") WIZ_PASSES+=("$(prompt_bmc_pass)") WIZ_MACS+=("$(prompt_boot_mac)") + WIZ_NODE_IPS+=("$(prompt_node_ip)") done + echo "" + echo "--- Cluster Network (all optional) ---" + WIZ_MACHINE_NETWORK="$(prompt_machine_network)" + WIZ_GATEWAY="$(prompt_gateway)" + WIZ_API_VIP="$(prompt_api_vip)" + WIZ_INGRESS_VIP="$(prompt_ingress_vip)" + show_summary local confirm @@ -284,6 +400,11 @@ write_inventory() { if [[ -n "${WIZ_MACS[$i]}" ]]; then line+=" boot_mac=${WIZ_MACS[$i]}" fi + if [[ -n "${WIZ_NODE_IPS[$i]}" ]]; then + line+=" node_ip=${WIZ_NODE_IPS[$i]}" + else + line+=" #node_ip=" + fi echo "${line}" >> "${tmp_inventory}" done @@ -295,6 +416,31 @@ write_inventory() { echo "cpu_arch=x86_64" } >> "${tmp_inventory}" + { + echo "" + echo "[baremetal_network]" + if [[ -n "${WIZ_MACHINE_NETWORK}" ]]; then + echo "machine_network=${WIZ_MACHINE_NETWORK}" + else + echo "#machine_network=" + fi + if [[ -n "${WIZ_GATEWAY}" ]]; then + echo "gateway=${WIZ_GATEWAY}" + else + echo "#gateway=" + fi + if [[ -n "${WIZ_API_VIP}" ]]; then + echo "api_vip=${WIZ_API_VIP}" + else + echo "#api_vip=" + fi + if [[ -n "${WIZ_INGRESS_VIP}" ]]; then + echo "ingress_vip=${WIZ_INGRESS_VIP}" + else + echo "#ingress_vip=" + fi + } >> "${tmp_inventory}" + mv "${tmp_inventory}" "${OUTPUT}" echo "" info "Inventory written to ${OUTPUT}" @@ -309,6 +455,11 @@ declare -a WIZ_IPS=() declare -a WIZ_USERS=() declare -a WIZ_PASSES=() declare -a WIZ_MACS=() +declare -a WIZ_NODE_IPS=() +WIZ_MACHINE_NETWORK="" +WIZ_GATEWAY="" +WIZ_API_VIP="" +WIZ_INGRESS_VIP="" parse_args "$@" run_wizard From b895d7decafcb63f331f0b60bdbdbd023f7ffcd1 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 30 Jun 2026 16:14:25 +0200 Subject: [PATCH 10/20] Add [provisioning_host] prompts to baremetal wizard The wizard now collects optional provisioning host config (ssh_target, ssh_key, dev_scripts_path, working_dir) and writes a [provisioning_host] section to inventory_baremetal.ini. Skipped fields are commented out. Co-Authored-By: Claude Opus 4.6 --- .../scripts/baremetal-wizard.sh | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh index b21bf172..6d636876 100755 --- a/deploy/openshift-clusters/scripts/baremetal-wizard.sh +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -275,6 +275,41 @@ prompt_ingress_vip() { done } +prompt_ssh_target() { + local target + while true; do + read -rp " SSH target for remote deployment (user@host, Enter to skip): " target + if [[ -z "${target}" ]]; then + echo "${target}" + return + fi + if ! [[ "${target}" == *@* ]]; then + echo " Error: expected user@host format" >&2 + continue + fi + echo "${target}" + return + done +} + +prompt_ssh_key() { + local key + read -rp " SSH key path (Enter for ssh-agent/default): " key + echo "${key}" +} + +prompt_dev_scripts_path() { + local path + read -rp " dev-scripts path on remote [~/openshift-metal3/dev-scripts]: " path + echo "${path}" +} + +prompt_working_dir() { + local dir + read -rp " Remote working directory [~/tnt-baremetal]: " dir + echo "${dir}" +} + ############################################################################## # Summary display ############################################################################## @@ -312,6 +347,15 @@ show_summary() { [[ -n "${WIZ_INGRESS_VIP}" ]] && echo " Ingress VIP: ${WIZ_INGRESS_VIP}" fi + if [[ -n "${WIZ_SSH_TARGET}" ]]; then + echo "" + echo " Provisioning Host:" + echo " SSH target: ${WIZ_SSH_TARGET}" + echo " SSH key: ${WIZ_SSH_KEY:---}" + echo " Dev-scripts: ${WIZ_DEV_SCRIPTS_PATH:-~/openshift-metal3/dev-scripts}" + echo " Working dir: ${WIZ_WORKING_DIR:-~/tnt-baremetal}" + fi + echo "==================================" } @@ -355,6 +399,19 @@ run_wizard() { WIZ_API_VIP="$(prompt_api_vip)" WIZ_INGRESS_VIP="$(prompt_ingress_vip)" + echo "" + echo "--- Provisioning Host (optional) ---" + WIZ_SSH_TARGET="$(prompt_ssh_target)" + if [[ -n "${WIZ_SSH_TARGET}" ]]; then + WIZ_SSH_KEY="$(prompt_ssh_key)" + WIZ_DEV_SCRIPTS_PATH="$(prompt_dev_scripts_path)" + WIZ_WORKING_DIR="$(prompt_working_dir)" + else + WIZ_SSH_KEY="" + WIZ_DEV_SCRIPTS_PATH="" + WIZ_WORKING_DIR="" + fi + show_summary local confirm @@ -441,6 +498,31 @@ write_inventory() { fi } >> "${tmp_inventory}" + { + echo "" + echo "[provisioning_host]" + if [[ -n "${WIZ_SSH_TARGET}" ]]; then + echo "ssh_target=${WIZ_SSH_TARGET}" + else + echo "#ssh_target=" + fi + if [[ -n "${WIZ_SSH_KEY}" ]]; then + echo "ssh_key=${WIZ_SSH_KEY}" + else + echo "#ssh_key=" + fi + if [[ -n "${WIZ_DEV_SCRIPTS_PATH}" ]]; then + echo "dev_scripts_path=${WIZ_DEV_SCRIPTS_PATH}" + else + echo "#dev_scripts_path=" + fi + if [[ -n "${WIZ_WORKING_DIR}" ]]; then + echo "working_dir=${WIZ_WORKING_DIR}" + else + echo "#working_dir=" + fi + } >> "${tmp_inventory}" + mv "${tmp_inventory}" "${OUTPUT}" echo "" info "Inventory written to ${OUTPUT}" @@ -460,6 +542,10 @@ WIZ_MACHINE_NETWORK="" WIZ_GATEWAY="" WIZ_API_VIP="" WIZ_INGRESS_VIP="" +WIZ_SSH_TARGET="" +WIZ_SSH_KEY="" +WIZ_DEV_SCRIPTS_PATH="" +WIZ_WORKING_DIR="" parse_args "$@" run_wizard From 101f5335f50290cc62f9f00c3527b3c5704d7a52 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Thu, 2 Jul 2026 10:30:54 +0200 Subject: [PATCH 11/20] Add BMC port to Redfish addresses for CEO fencing compatibility Without an explicit port in the Redfish URL, CEO's fence_redfish agent fails to connect to the BMC during TNF auth jobs. The port (default 443) now flows through verification, discovery, and ironic_nodes.json generation as redfish://host:port/redfish/v1/Systems/1. Adds bmc_port as a per-node field with a group default in [baremetal_nodes:vars], matching the existing inheritance pattern. Co-Authored-By: Claude Opus 4.6 --- .../inventory_baremetal.ini.sample | 4 +++ .../scripts/baremetal-adopt.sh | 34 +++++++++++-------- .../scripts/baremetal-wizard.sh | 30 ++++++++++++---- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index 47b73da5..f27a1d90 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -12,6 +12,7 @@ # bmc_address - BMC/iDRAC/iLO management address (IP or hostname) # bmc_user - BMC login username # bmc_pass - BMC login password +# bmc_port - (optional) BMC Redfish port (default: 443) # boot_mac - (optional) MAC address of the NIC used for PXE boot # If omitted, the adopt script attempts Redfish discovery. # node_ip - (optional) Static IP address for this node on the machine network @@ -28,6 +29,9 @@ master-1 bmc_address=192.168.1.101 bmc_user=admin bmc_pass=changeme boot_mac=52: # BMC driver — only redfish is supported for TNF fencing bmc_driver=redfish +# BMC Redfish port (per-node bmc_port overrides this) +bmc_port=443 + # Skip TLS verification for BMC endpoints (common with self-signed certs) bmc_verify_ca=False diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 4521407d..d5f701c2 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -34,6 +34,7 @@ declare -a NODE_NAMES=() declare -a NODE_BMC_ADDRS=() declare -a NODE_BMC_USERS=() declare -a NODE_BMC_PASSES=() +declare -a NODE_BMC_PORTS=() declare -a NODE_BOOT_MACS=() declare -a NODE_IPS=() @@ -44,6 +45,7 @@ API_VIP="" INGRESS_VIP="" # Group defaults +BMC_PORT="443" BMC_VERIFY_CA="False" CPU_ARCH="x86_64" @@ -136,6 +138,7 @@ parse_inventory() { key="${line%%=*}" val="${line#*=}" case "${key}" in + bmc_port) BMC_PORT="${val}" ;; bmc_verify_ca) BMC_VERIFY_CA="${val}" ;; cpu_arch) CPU_ARCH="${val}" ;; esac @@ -160,7 +163,7 @@ parse_inventory() { name="${line%% *}" rest="${line#* }" - local bmc_address="" bmc_user="" bmc_pass="" boot_mac="" node_ip="" + local bmc_address="" bmc_user="" bmc_pass="" bmc_port="" boot_mac="" node_ip="" for pair in ${rest}; do local key val key="${pair%%=*}" @@ -169,6 +172,7 @@ parse_inventory() { bmc_address) bmc_address="${val}" ;; bmc_user) bmc_user="${val}" ;; bmc_pass) bmc_pass="${val}" ;; + bmc_port) bmc_port="${val}" ;; boot_mac) boot_mac="${val}" ;; node_ip) node_ip="${val}" ;; esac @@ -182,6 +186,7 @@ parse_inventory() { NODE_BMC_ADDRS+=("${bmc_address}") NODE_BMC_USERS+=("${bmc_user}") NODE_BMC_PASSES+=("${bmc_pass}") + NODE_BMC_PORTS+=("${bmc_port:-${BMC_PORT}}") NODE_BOOT_MACS+=("${boot_mac}") NODE_IPS+=("${node_ip}") fi @@ -205,31 +210,31 @@ bmc_curl() { } discover_redfish_system_id() { - local bmc_address="$1" bmc_user="$2" bmc_pass="$3" + local bmc_address="$1" bmc_user="$2" bmc_pass="$3" bmc_port="$4" local systems_json systems_json=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}/redfish/v1/Systems/" 2>/dev/null) || return 1 + "https://${bmc_address}:${bmc_port}/redfish/v1/Systems/" 2>/dev/null) || return 1 echo "${systems_json}" | jq -r '.Members[0]."@odata.id" // empty' 2>/dev/null } discover_boot_mac() { - local bmc_address="$1" bmc_user="$2" bmc_pass="$3" system_id="$4" + local bmc_address="$1" bmc_user="$2" bmc_pass="$3" bmc_port="$4" system_id="$5" # Get boot order from the system resource local boot_order boot_order=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}/${system_id}" 2>/dev/null \ + "https://${bmc_address}:${bmc_port}/${system_id}" 2>/dev/null \ | jq -r '.Boot.BootOrder[]' 2>/dev/null) || return 1 # Fetch all boot options and index by BootOptionReference local options_json options_json=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}/${system_id}/BootOptions/" 2>/dev/null) || return 1 + "https://${bmc_address}:${bmc_port}/${system_id}/BootOptions/" 2>/dev/null) || return 1 local option_paths option_paths=$(echo "${options_json}" | jq -r '.Members[]."@odata.id"' 2>/dev/null) || return 1 @@ -240,7 +245,7 @@ discover_boot_mac() { local option option=$(bmc_curl \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}${option_url}" 2>/dev/null) || continue + "https://${bmc_address}:${bmc_port}${option_url}" 2>/dev/null) || continue local ref ref=$(echo "${option}" | jq -r '.BootOptionReference // empty' 2>/dev/null) @@ -265,17 +270,17 @@ discover_boot_mac() { } verify_bmc() { - local name="$1" bmc_address="$2" bmc_user="$3" bmc_pass="$4" + local name="$1" bmc_address="$2" bmc_user="$3" bmc_pass="$4" bmc_port="$5" local rc=0 - printf " %-12s %-20s " "${name}" "${bmc_address}" + printf " %-12s %-20s " "${name}" "${bmc_address}:${bmc_port}" # Verify Redfish root is reachable and credentials work local http_code http_code=$(bmc_curl \ -o /dev/null -w '%{http_code}' \ -u "${bmc_user}:${bmc_pass}" \ - "https://${bmc_address}/redfish/v1/" 2>/dev/null) || http_code="000" + "https://${bmc_address}:${bmc_port}/redfish/v1/" 2>/dev/null) || http_code="000" if [[ "${http_code}" == "200" ]]; then echo "OK (HTTP ${http_code})" @@ -300,7 +305,7 @@ verify_all_bmcs() { local failed=0 for ((i = 0; i < ${#NODE_NAMES[@]}; i++)); do if ! verify_bmc "${NODE_NAMES[$i]}" "${NODE_BMC_ADDRS[$i]}" \ - "${NODE_BMC_USERS[$i]}" "${NODE_BMC_PASSES[$i]}"; then + "${NODE_BMC_USERS[$i]}" "${NODE_BMC_PASSES[$i]}" "${NODE_BMC_PORTS[$i]}"; then failed=$((failed + 1)) fi done @@ -329,6 +334,7 @@ generate_ironic_nodes_json() { local bmc_address="${NODE_BMC_ADDRS[$i]}" local bmc_user="${NODE_BMC_USERS[$i]}" local bmc_pass="${NODE_BMC_PASSES[$i]}" + local bmc_port="${NODE_BMC_PORTS[$i]}" local boot_mac="${NODE_BOOT_MACS[$i]}" # Discover Redfish system path (requires BMC access) @@ -336,7 +342,7 @@ generate_ironic_nodes_json() { if ${SKIP_VERIFY}; then system_id="redfish/v1/Systems/1" else - system_id=$(discover_redfish_system_id "${bmc_address}" "${bmc_user}" "${bmc_pass}" 2>/dev/null) || true + system_id=$(discover_redfish_system_id "${bmc_address}" "${bmc_user}" "${bmc_pass}" "${bmc_port}" 2>/dev/null) || true system_id="${system_id:-/redfish/v1/Systems/1}" system_id="${system_id#/}" system_id="${system_id%/}" @@ -350,7 +356,7 @@ generate_ironic_nodes_json() { continue fi info " ${name}: boot_mac not set, attempting Redfish discovery..." - boot_mac=$(discover_boot_mac "${bmc_address}" "${bmc_user}" "${bmc_pass}" "${system_id}" 2>/dev/null) || true + boot_mac=$(discover_boot_mac "${bmc_address}" "${bmc_user}" "${bmc_pass}" "${bmc_port}" "${system_id}" 2>/dev/null) || true if [[ -n "${boot_mac}" ]]; then info " ${name}: discovered boot MAC ${boot_mac}" else @@ -362,7 +368,7 @@ generate_ironic_nodes_json() { nodes+=("$(jq -n \ --arg name "${name}" \ - --arg addr "redfish://${bmc_address}/${system_id}" \ + --arg addr "redfish://${bmc_address}:${bmc_port}/${system_id}" \ --arg user "${bmc_user}" \ --arg pass "${bmc_pass}" \ --arg verify_ca "${BMC_VERIFY_CA}" \ diff --git a/deploy/openshift-clusters/scripts/baremetal-wizard.sh b/deploy/openshift-clusters/scripts/baremetal-wizard.sh index 6d636876..fe13efa8 100755 --- a/deploy/openshift-clusters/scripts/baremetal-wizard.sh +++ b/deploy/openshift-clusters/scripts/baremetal-wizard.sh @@ -173,6 +173,20 @@ prompt_bmc_pass() { done } +prompt_bmc_port() { + local port + while true; do + read -rp " BMC port [443]: " port + port="${port:-443}" + if ! [[ "${port}" =~ ^[0-9]+$ ]] || (( port < 1 || port > 65535 )); then + echo " Error: invalid port (expected 1-65535)" >&2 + continue + fi + echo "${port}" + return + done +} + prompt_boot_mac() { local mac while true; do @@ -319,19 +333,20 @@ show_summary() { echo "==================================" echo " BAREMETAL NODE SUMMARY" echo "==================================" - printf " %-4s %-14s %-40s %-10s %-10s %-19s %-17s\n" \ + printf " %-4s %-14s %-46s %-10s %-10s %-19s %-17s\n" \ "#" "HOSTNAME" "BMC ADDRESS" "BMC USER" "PASSWORD" "BOOT MAC" "NODE IP" - printf " %-4s %-14s %-40s %-10s %-10s %-19s %-17s\n" \ - "---" "------------" "--------------------------------------" "--------" "--------" "-----------------" "---------------" + printf " %-4s %-14s %-46s %-10s %-10s %-19s %-17s\n" \ + "---" "------------" "--------------------------------------------" "--------" "--------" "-----------------" "---------------" local i for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do local display_mac="${WIZ_MACS[$i]:-auto-discover}" local display_ip="${WIZ_NODE_IPS[$i]:---}" - printf " %-4s %-14s %-40s %-10s %-10s %-19s %-17s\n" \ + local display_addr="${WIZ_IPS[$i]}:${WIZ_PORTS[$i]}" + printf " %-4s %-14s %-46s %-10s %-10s %-19s %-17s\n" \ "$((i + 1))" \ "${WIZ_NAMES[$i]}" \ - "${WIZ_IPS[$i]}" \ + "${display_addr}" \ "${WIZ_USERS[$i]}" \ "********" \ "${display_mac}" \ @@ -388,6 +403,7 @@ run_wizard() { WIZ_IPS+=("$(prompt_bmc_address)") WIZ_USERS+=("$(prompt_bmc_user)") WIZ_PASSES+=("$(prompt_bmc_pass)") + WIZ_PORTS+=("$(prompt_bmc_port)") WIZ_MACS+=("$(prompt_boot_mac)") WIZ_NODE_IPS+=("$(prompt_node_ip)") done @@ -453,7 +469,7 @@ write_inventory() { local i for ((i = 0; i < ${#WIZ_NAMES[@]}; i++)); do - local line="${WIZ_NAMES[$i]} bmc_address=${WIZ_IPS[$i]} bmc_user=${WIZ_USERS[$i]} bmc_pass=${WIZ_PASSES[$i]}" + local line="${WIZ_NAMES[$i]} bmc_address=${WIZ_IPS[$i]} bmc_user=${WIZ_USERS[$i]} bmc_pass=${WIZ_PASSES[$i]} bmc_port=${WIZ_PORTS[$i]}" if [[ -n "${WIZ_MACS[$i]}" ]]; then line+=" boot_mac=${WIZ_MACS[$i]}" fi @@ -469,6 +485,7 @@ write_inventory() { echo "" echo "[baremetal_nodes:vars]" echo "bmc_driver=redfish" + echo "bmc_port=443" echo "bmc_verify_ca=False" echo "cpu_arch=x86_64" } >> "${tmp_inventory}" @@ -536,6 +553,7 @@ declare -a WIZ_NAMES=() declare -a WIZ_IPS=() declare -a WIZ_USERS=() declare -a WIZ_PASSES=() +declare -a WIZ_PORTS=() declare -a WIZ_MACS=() declare -a WIZ_NODE_IPS=() WIZ_MACHINE_NETWORK="" From 8d9c936acb04d1df13f4f84bdeb27343fef6e7cb Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Thu, 2 Jul 2026 11:23:46 +0200 Subject: [PATCH 12/20] Move generated adoption artifacts to role files/ directory Place ironic_nodes.json and config_baremetal_fencing.sh in roles/dev-scripts/install-dev/files/ instead of clusters// so existing Ansible config.yml tasks can reference them by basename. Remove --cluster-name flag (only used for the clusters/ subdirectory). Co-Authored-By: Claude Opus 4.6 --- deploy/openshift-clusters/.gitignore | 2 -- .../roles/dev-scripts/install-dev/files/.gitignore | 4 +++- .../openshift-clusters/scripts/baremetal-adopt.sh | 14 +++----------- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/deploy/openshift-clusters/.gitignore b/deploy/openshift-clusters/.gitignore index a77bcddb..96b8f64c 100644 --- a/deploy/openshift-clusters/.gitignore +++ b/deploy/openshift-clusters/.gitignore @@ -1,8 +1,6 @@ inventory.ini inventory_baremetal.ini -# Generated adoption artifacts (contain BMC credentials) -clusters/ proxy.env kubeconfig kubeadmin-password diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/.gitignore b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/.gitignore index 0818832e..e6a1d72e 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/.gitignore +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/files/.gitignore @@ -3,4 +3,6 @@ ci_token clusterbot-ci_token config_arbiter.sh config_fencing.sh -config_sno.sh \ No newline at end of file +config_sno.sh +config_baremetal_fencing.sh +ironic_nodes.json diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index d5f701c2..0f4bb301 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -9,7 +9,6 @@ # baremetal-adopt.sh [options] # # Options: -# --cluster-name NAME Cluster name for output directory (default: ostest) # --skip-verify Skip all BMC access (verify + discovery); requires boot_mac in inventory # --verify-only Only verify BMC credentials, don't generate artifacts # --inventory FILE Path to baremetal inventory (default: inventory_baremetal.ini) @@ -23,7 +22,6 @@ set -o pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" OC_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" -CLUSTER_NAME="${CLUSTER_NAME:-ostest}" SKIP_VERIFY=false VERIFY_ONLY=false CONFIG_BASE="" @@ -64,10 +62,6 @@ info() { echo "==> $*"; } parse_args() { while [[ $# -gt 0 ]]; do case $1 in - --cluster-name) - CLUSTER_NAME="$2" - shift 2 - ;; --skip-verify) SKIP_VERIFY=true shift @@ -85,7 +79,7 @@ parse_args() { shift 2 ;; -h|--help) - head -17 "$0" | tail -12 + head -16 "$0" | tail -11 exit 0 ;; *) @@ -482,10 +476,8 @@ main() { exit 0 fi - # Create output directory - local output_dir="${OC_DIR}/clusters/${CLUSTER_NAME}" - umask 077 - mkdir -p "${output_dir}" + # Output alongside existing dev-scripts config files + local output_dir="${OC_DIR}/roles/dev-scripts/install-dev/files" # Generate artifacts local nodes_file="${output_dir}/ironic_nodes.json" From 298336de386a559bcfe67766e643dfcb3a75cc23 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Tue, 30 Jun 2026 16:17:51 +0200 Subject: [PATCH 13/20] Add baremetal deploy via dev-scripts ABI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add deploy-baremetal.sh — a standalone shell script that deploys a TNF fencing cluster on adopted baremetal nodes using dev-scripts' agent pipeline. Skips libvirt/qemu setup and drives Redfish VirtualMedia for ISO delivery. Supports remote execution: if [provisioning_host] is configured in inventory_baremetal.ini, syncs artifacts via rsync and runs the deploy on the remote host via SSH. Falls back to local execution if absent. Changes: - deploy-baremetal.sh: pre-flight validation, dev-scripts setup, remote execution via SSH with credential retrieval - inventory_baremetal.ini.sample: add [provisioning_host] section - baremetal-adopt.sh: append AGENT_E2E_TEST_SCENARIO to generated config - Makefile: add baremetal-fencing-agent target Co-Authored-By: Claude Opus 4.6 --- deploy/Makefile | 10 +- .../inventory_baremetal.ini.sample | 22 + .../scripts/baremetal-adopt.sh | 1 + .../scripts/deploy-baremetal.sh | 486 ++++++++++++++++++ 4 files changed, 516 insertions(+), 3 deletions(-) create mode 100755 deploy/openshift-clusters/scripts/deploy-baremetal.sh diff --git a/deploy/Makefile b/deploy/Makefile index cd709341..7558c7fd 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -98,6 +98,9 @@ baremetal-adopt: baremetal-verify: @./openshift-clusters/scripts/baremetal-adopt.sh --verify-only +baremetal-fencing-agent: + @./openshift-clusters/scripts/deploy-baremetal.sh + baremetal-wizard: @./openshift-clusters/scripts/baremetal-wizard.sh @@ -148,9 +151,10 @@ help: @echo " patch-nodes - Build resource-agents RPM and patch cluster nodes (default version: 4.11)" @echo "" @echo "Baremetal Adoption:" - @echo " baremetal-adopt - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts" - @echo " baremetal-verify - Verify BMC credentials for adopted baremetal nodes (no artifacts)" - @echo " baremetal-wizard - Interactive wizard to create baremetal node inventory" + @echo " baremetal-adopt - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts" + @echo " baremetal-fencing-agent - Deploy TNF cluster on adopted baremetal nodes (local dev-scripts ABI)" + @echo " baremetal-verify - Verify BMC credentials for adopted baremetal nodes (no artifacts)" + @echo " baremetal-wizard - Interactive wizard to create baremetal node inventory" @echo "" @echo "Cluster Utilities:" @echo " get-tnf-logs - Collect pacemaker and etcd logs from cluster nodes" diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index f27a1d90..53d755b9 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -48,3 +48,25 @@ cpu_arch=x86_64 #gateway=192.168.1.1 #api_vip=192.168.1.100 #ingress_vip=192.168.1.101 + +[provisioning_host] +# Provisioning host for baremetal ABI deployment. Must be on the same L2 network +# as the baremetal nodes (serves agent ISO via HTTP, runs dnsmasq, acts as gateway). +# +# If this section is configured, deploy-baremetal.sh syncs artifacts to the host +# and runs the deployment there via SSH. If absent, deployment runs locally. +# +# Required: +# ssh_target - user@host for SSH access (key-based auth required) +# +# Optional: +# ssh_key - Path to SSH private key (default: ssh-agent or ~/.ssh/id_rsa) +# dev_scripts_path - DEV_SCRIPTS_PATH on the remote host +# (default: ~/openshift-metal3/dev-scripts) +# working_dir - Remote staging directory for TNT artifacts +# (default: ~/tnt-baremetal) +# +#ssh_target=root@10.1.155.50 +#ssh_key=~/.ssh/lab_key +#dev_scripts_path=~/openshift-metal3/dev-scripts +#working_dir=~/tnt-baremetal diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 0f4bb301..744a8ed6 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -421,6 +421,7 @@ generate_baremetal_config() { echo "export MANAGE_BR_BRIDGE=n" echo "export MANAGE_PRO_BRIDGE=n" echo "export MANAGE_INT_BRIDGE=n" + echo "export AGENT_E2E_TEST_SCENARIO=\"TNF_IPV4\"" if [[ -n "${MACHINE_NETWORK}" || -n "${GATEWAY}" || -n "${API_VIP}" || -n "${INGRESS_VIP}" ]]; then echo "" diff --git a/deploy/openshift-clusters/scripts/deploy-baremetal.sh b/deploy/openshift-clusters/scripts/deploy-baremetal.sh new file mode 100755 index 00000000..742f96f3 --- /dev/null +++ b/deploy/openshift-clusters/scripts/deploy-baremetal.sh @@ -0,0 +1,486 @@ +#!/usr/bin/bash +# +# Deploy a TNF fencing cluster on adopted baremetal nodes via dev-scripts ABI. +# +# If [provisioning_host] is configured in inventory_baremetal.ini, syncs +# artifacts and executes on the remote host via SSH. Otherwise runs locally. +# Expects adoption artifacts from 'make baremetal-adopt'. +# +# Usage: +# deploy-baremetal.sh [options] +# +# Options: +# --cluster-name NAME Cluster name matching adoption artifacts (default: ostest) +# --dev-scripts-path PATH Path to dev-scripts checkout (default: ~/openshift-metal3/dev-scripts) +# -h, --help Show this help message + +set -o nounset +set -o errexit +set -o pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +OC_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" + +CLUSTER_NAME="${CLUSTER_NAME:-ostest}" +DEV_SCRIPTS_PATH="${DEV_SCRIPTS_PATH:-${HOME}/openshift-metal3/dev-scripts}" +DEV_SCRIPTS_REPO="https://github.com/openshift-metal3/dev-scripts" + +############################################################################## +# Helpers +############################################################################## + +die() { echo "Error: $*" >&2; exit 1; } + +info() { echo "==> $*"; } + +warn() { echo " WARNING: $*" >&2; } + +############################################################################## +# Argument parsing +############################################################################## + +parse_args() { + while [[ $# -gt 0 ]]; do + case $1 in + --cluster-name) + CLUSTER_NAME="$2" + shift 2 + ;; + --dev-scripts-path) + DEV_SCRIPTS_PATH="$2" + shift 2 + ;; + -h|--help) + head -15 "$0" | tail -10 + exit 0 + ;; + *) + die "Unknown option: $1. Run '$0 --help' for usage." + ;; + esac + done +} + +############################################################################## +# Pre-flight validation +############################################################################## + +REQUIRED_TOOLS=(podman oc jq curl dnsmasq firewall-cmd xmllint ansible-playbook go) + +validate_tools() { + local missing=() + for tool in "${REQUIRED_TOOLS[@]}"; do + if ! command -v "${tool}" &>/dev/null; then + missing+=("${tool}") + fi + done + + if [[ ${#missing[@]} -gt 0 ]]; then + echo "" + echo "Missing required tools: ${missing[*]}" + echo "" + echo "Install with:" + echo " sudo dnf install -y podman jq curl dnsmasq firewalld libxml2 ansible-core golang" + echo "" + echo "Then install oc:" + echo " Download from https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable/" + die "Install missing tools and re-run." + fi + info "Required tools present" + + # Ansible collections required by dev-scripts agent manifests + local collections_needed=0 + for col in ansible.utils ansible.netcommon ansible.posix community.general; do + if ! ansible-galaxy collection list 2>/dev/null | grep -q "${col}"; then + collections_needed=1 + break + fi + done + + if [[ ${collections_needed} -eq 1 ]]; then + info "Installing required Ansible collections..." + ansible-galaxy collection install 'ansible.netcommon<8.0.0' ansible.posix 'ansible.utils<6.0.0' community.general + fi +} + +validate_sudo() { + if ! sudo -n true 2>/dev/null; then + die "Dev-scripts requires sudo access (package installs, network config, ironic dirs).\nRun this script from an interactive terminal, or configure passwordless sudo." + fi +} + +validate_artifacts() { + local cluster_dir="${OC_DIR}/clusters/${CLUSTER_NAME}" + + [[ -d "${cluster_dir}" ]] || \ + die "Cluster directory not found: ${cluster_dir}\nRun 'make baremetal-adopt' first." + + CONFIG_FILE="${cluster_dir}/config_baremetal_fencing.sh" + NODES_FILE="${cluster_dir}/ironic_nodes.json" + + [[ -f "${CONFIG_FILE}" ]] || \ + die "Config not found: ${CONFIG_FILE}\nRun 'make baremetal-adopt' first." + [[ -f "${NODES_FILE}" ]] || \ + die "Nodes file not found: ${NODES_FILE}\nRun 'make baremetal-adopt' first." + + info "Adoption artifacts found in ${cluster_dir}" +} + +validate_pull_secret() { + PULL_SECRET="${OC_DIR}/roles/dev-scripts/install-dev/files/pull-secret.json" + + [[ -f "${PULL_SECRET}" ]] || \ + die "Pull secret not found: ${PULL_SECRET}\nCopy your pull secret to this path." + + if ! jq empty "${PULL_SECRET}" 2>/dev/null; then + die "Invalid JSON in ${PULL_SECRET}\nValidate with: python3 -m json.tool ${PULL_SECRET}" + fi +} + +validate_config() { + local config_content + config_content=$(<"${CONFIG_FILE}") + + # AGENT_E2E_TEST_SCENARIO must be set + if ! grep -qE '^export AGENT_E2E_TEST_SCENARIO=' <<<"${config_content}"; then + die "AGENT_E2E_TEST_SCENARIO not set in ${CONFIG_FILE}.\nThis should have been added by 'make baremetal-adopt'. Re-run adoption." + fi + + # CI token validation (if using CI registry) + local ci_token="" + ci_token=$(grep -oP '^export CI_TOKEN="\K[^"]+' <<<"${config_content}" || true) + + if [[ -n "${ci_token}" ]]; then + local release_registry="" + release_registry=$(grep -oP '^export OPENSHIFT_RELEASE_IMAGE="?\K[^/"]+' <<<"${config_content}" || true) + + if [[ "${release_registry}" == "registry.ci.openshift.org" ]]; then + info "Validating CI token against ${release_registry}..." + local http_code + http_code=$(curl -s -o /dev/null -w '%{http_code}' \ + -H "Authorization: Bearer ${ci_token}" \ + "https://${release_registry}/v2/" 2>/dev/null) || http_code="000" + + if [[ "${http_code}" != "200" ]]; then + die "CI token is invalid or expired for ${release_registry} (HTTP ${http_code}).\nUpdate CI_TOKEN in your base config and re-run 'make baremetal-adopt'." + fi + info "CI token valid" + fi + fi + + # CI registry in pull-secret check + if grep -q 'registry.ci.openshift.org' <<<"${config_content}"; then + if ! jq -e '.auths["registry.ci.openshift.org"]' "${PULL_SECRET}" >/dev/null 2>&1; then + die "Config uses CI registry but pull secret lacks registry.ci.openshift.org credentials." + fi + fi + + # BAREMETAL_ISO_SERVER warning + if ! grep -qE '^export BAREMETAL_ISO_SERVER=' <<<"${config_content}"; then + warn "BAREMETAL_ISO_SERVER not set in config." + warn "You must set this in dev-scripts config before running 'make agent'." + warn "The ISO server must be reachable from BMC networks (not the provisioning host)." + fi +} + +############################################################################## +# Dev-scripts setup +############################################################################## + +setup_dev_scripts() { + if [[ ! -d "${DEV_SCRIPTS_PATH}" ]]; then + info "Dev-scripts not found at ${DEV_SCRIPTS_PATH}, cloning..." + git clone "${DEV_SCRIPTS_REPO}" "${DEV_SCRIPTS_PATH}" + else + info "Using dev-scripts at ${DEV_SCRIPTS_PATH}" + fi + + [[ -f "${DEV_SCRIPTS_PATH}/Makefile" ]] || \ + die "Invalid dev-scripts checkout: ${DEV_SCRIPTS_PATH} (no Makefile found)" + + local ds_user + ds_user=$(whoami) + + # Set WORKING_DIR in config if not already present — avoids sudo for /opt/dev-scripts + local working_dir="${WORKING_DIR:-${HOME}/dev-scripts-workdir}" + if ! grep -qE '^export WORKING_DIR=' "${CONFIG_FILE}"; then + info "Setting WORKING_DIR=${working_dir}" + fi + mkdir -p "${working_dir}" + + # Resolve PROVISIONING_HOST_EXTERNAL_IP for baremetal. + # In the libvirt flow this is the host running dnsmasq on the virtual bridge. + # For baremetal, the nodes use real network infrastructure — default to the + # gateway as DNS server (common in lab networks). Override with BAREMETAL_DNS + # if the lab has a dedicated DNS server. + local prov_ip="" + prov_ip=$(grep -oP '^export PROVISIONING_HOST_EXTERNAL_IP="\K[^"]+' "${CONFIG_FILE}" || true) + if [[ -z "${prov_ip}" ]]; then + local dns_ip="${BAREMETAL_DNS:-}" + if [[ -z "${dns_ip}" ]]; then + dns_ip=$(grep -oP '^export BAREMETAL_GATEWAY="\K[^"]+' "${CONFIG_FILE}" || true) + fi + if [[ -z "${dns_ip}" ]]; then + die "Cannot determine DNS server for nodes. Set BAREMETAL_DNS or BAREMETAL_GATEWAY in config." + fi + info "Setting PROVISIONING_HOST_EXTERNAL_IP=${dns_ip} (from BAREMETAL_DNS/BAREMETAL_GATEWAY)" + fi + + # Ensure 'python' resolves — dev-scripts' nth_ip() calls bare 'python', + # and Fedora only ships 'python3'. + if ! command -v python &>/dev/null && command -v python3 &>/dev/null; then + local python_wrapper="${DEV_SCRIPTS_PATH}/.local-bin" + mkdir -p "${python_wrapper}" + ln -sf "$(command -v python3)" "${python_wrapper}/python" + export PATH="${python_wrapper}:${PATH}" + fi + + # Ansible collections installed in user home need to be on PYTHONPATH + # for nth_ip() which calls python directly (not via ansible-playbook). + if [[ -d "${HOME}/.ansible/collections" ]]; then + export PYTHONPATH="${HOME}/.ansible/collections:${PYTHONPATH:-}" + fi + + info "Deploying config to dev-scripts" + { + cat "${CONFIG_FILE}" + if ! grep -qE '^export WORKING_DIR=' "${CONFIG_FILE}"; then + echo "" + echo "# Working directory (set by deploy-baremetal.sh)" + echo "export WORKING_DIR=\"${working_dir}\"" + fi + if [[ -z "${prov_ip}" && -n "${dns_ip:-}" ]]; then + echo "" + echo "# DNS/gateway for node NMState config (set by deploy-baremetal.sh)" + echo "export PROVISIONING_HOST_EXTERNAL_IP=\"${dns_ip}\"" + fi + } > "${DEV_SCRIPTS_PATH}/config_${ds_user}.sh" + cp "${PULL_SECRET}" "${DEV_SCRIPTS_PATH}/pull_secret.json" + cp "${NODES_FILE}" "${DEV_SCRIPTS_PATH}/ironic_nodes.json" + + # REGISTRY_CREDS defaults to ~/private-mirror-.json — the local + # mirror registry credentials. Baremetal deploys don't run a local registry, + # so create an empty auth file to prevent jq merge failures in write_pull_secret(). + local registry_creds="${HOME}/private-mirror-${CLUSTER_NAME}.json" + if [[ ! -f "${registry_creds}" ]]; then + echo '{"auths":{}}' > "${registry_creds}" + info " mirror → ${registry_creds} (empty — no local registry)" + fi + + info " config → config_${ds_user}.sh" + info " secret → pull_secret.json" + info " nodes → ironic_nodes.json" +} + +############################################################################## +# Remote execution (provisioning host) +############################################################################## + +parse_provisioning_host() { + local inventory="${OC_DIR}/inventory_baremetal.ini" + PROV_SSH_TARGET="" + PROV_SSH_KEY="" + PROV_DEV_SCRIPTS_PATH="" + PROV_WORKING_DIR="tnt-baremetal" + + [[ -f "${inventory}" ]] || return 0 + + local in_section=false + while IFS= read -r line || [[ -n "${line}" ]]; do + line="${line%%#*}" + line="${line#"${line%%[![:space:]]*}"}" + line="${line%"${line##*[![:space:]]}"}" + [[ -z "${line}" ]] && continue + + if [[ "${line}" == "[provisioning_host]" ]]; then + in_section=true + continue + elif [[ "${line}" =~ ^\[.*\] ]]; then + in_section=false + continue + fi + + if ${in_section}; then + local key="${line%%=*}" + local val="${line#*=}" + case "${key}" in + ssh_target) PROV_SSH_TARGET="${val}" ;; + ssh_key) PROV_SSH_KEY="${val}" ;; + dev_scripts_path) PROV_DEV_SCRIPTS_PATH="${val}" ;; + working_dir) PROV_WORKING_DIR="${val}" ;; + esac + fi + done < "${inventory}" +} + +build_ssh_opts() { + SSH_OPTS=(-o "ServerAliveInterval=30" -o "ServerAliveCountMax=120") + [[ -n "${PROV_SSH_KEY:-}" ]] && SSH_OPTS+=(-i "${PROV_SSH_KEY}") +} + +validate_ssh_connectivity() { + info "Validating SSH access to ${PROV_SSH_TARGET}..." + + local opts=(-o "ConnectTimeout=10" -o "BatchMode=yes") + [[ -n "${PROV_SSH_KEY:-}" ]] && opts+=(-i "${PROV_SSH_KEY}") + + if ! ssh "${opts[@]}" "${PROV_SSH_TARGET}" "true" 2>/dev/null; then + die "Cannot SSH to provisioning host: ${PROV_SSH_TARGET} +Ensure: + 1. SSH key-based auth is configured + 2. The host is reachable from this machine + 3. ssh_key is set in inventory_baremetal.ini if using a non-default key" + fi + + if ! ssh "${opts[@]}" "${PROV_SSH_TARGET}" "command -v rsync" &>/dev/null; then + die "rsync not found on provisioning host. +Install with: ssh ${PROV_SSH_TARGET} 'sudo dnf install -y rsync'" + fi + + info "SSH connectivity OK" +} + +sync_to_remote() { + local remote_dir="${PROV_WORKING_DIR}" + + info "Syncing artifacts to ${PROV_SSH_TARGET}:~/${remote_dir}" + + # shellcheck disable=SC2029 + ssh "${SSH_OPTS[@]}" "${PROV_SSH_TARGET}" \ + "mkdir -p ~/${remote_dir}/{scripts,clusters/${CLUSTER_NAME},roles/dev-scripts/install-dev/files}" + + rsync -az -e "ssh ${SSH_OPTS[*]}" \ + "${SCRIPT_DIR}/deploy-baremetal.sh" \ + "${PROV_SSH_TARGET}:~/${remote_dir}/scripts/" + + rsync -az -e "ssh ${SSH_OPTS[*]}" \ + "${OC_DIR}/clusters/${CLUSTER_NAME}/config_baremetal_fencing.sh" \ + "${OC_DIR}/clusters/${CLUSTER_NAME}/ironic_nodes.json" \ + "${PROV_SSH_TARGET}:~/${remote_dir}/clusters/${CLUSTER_NAME}/" + + rsync -az -e "ssh ${SSH_OPTS[*]}" \ + "${PULL_SECRET}" \ + "${PROV_SSH_TARGET}:~/${remote_dir}/roles/dev-scripts/install-dev/files/pull-secret.json" + + rsync -az -e "ssh ${SSH_OPTS[*]}" \ + "${OC_DIR}/inventory_baremetal.ini" \ + "${PROV_SSH_TARGET}:~/${remote_dir}/inventory_baremetal.ini" + + info "Sync complete" +} + +exec_on_remote() { + # shellcheck disable=SC2088 # tilde expands on the remote shell via SSH + local remote_script="~/${PROV_WORKING_DIR}/scripts/deploy-baremetal.sh" + local remote_args="--cluster-name ${CLUSTER_NAME}" + [[ -n "${PROV_DEV_SCRIPTS_PATH:-}" ]] && \ + remote_args+=" --dev-scripts-path ${PROV_DEV_SCRIPTS_PATH}" + + info "Executing deploy on ${PROV_SSH_TARGET}..." + info "Remote output follows:" + echo "==========================================" + + # shellcheck disable=SC2029 + ssh -tt "${SSH_OPTS[@]}" "${PROV_SSH_TARGET}" \ + "TNT_REMOTE_EXEC=1 bash ${remote_script} ${remote_args}" +} + +fetch_credentials() { + local remote_home + remote_home=$(ssh "${SSH_OPTS[@]}" "${PROV_SSH_TARGET}" 'echo ${HOME}') + local remote_ds="${PROV_DEV_SCRIPTS_PATH:-${remote_home}/openshift-metal3/dev-scripts}" + local remote_auth="${remote_ds}/ocp/${CLUSTER_NAME}/auth" + local local_auth="${OC_DIR}/clusters/${CLUSTER_NAME}/auth" + + info "Fetching cluster credentials from provisioning host..." + mkdir -p "${local_auth}" + + rsync -az -e "ssh ${SSH_OPTS[*]}" \ + "${PROV_SSH_TARGET}:${remote_auth}/" \ + "${local_auth}/" + + info "Kubeconfig: ${local_auth}/kubeconfig" + info "Password: ${local_auth}/kubeadmin-password" +} + +############################################################################## +# Main +############################################################################## + +main() { + parse_args "$@" + + # Remote execution: if [provisioning_host] is configured and we're not + # already running on the remote side, sync artifacts and SSH in. + if [[ -z "${TNT_REMOTE_EXEC:-}" ]]; then + parse_provisioning_host + if [[ -n "${PROV_SSH_TARGET}" ]]; then + build_ssh_opts + info "Baremetal TNF deployment — cluster: ${CLUSTER_NAME}" + info "Provisioning host: ${PROV_SSH_TARGET}" + echo "" + + # Validate artifacts locally (fast, catches errors before SSH) + validate_artifacts + validate_pull_secret + validate_config + echo "" + + validate_ssh_connectivity + sync_to_remote + echo "" + exec_on_remote + echo "" + echo "==========================================" + fetch_credentials + echo "" + info "Baremetal TNF cluster deployed via provisioning host!" + exit 0 + fi + fi + + # Local execution (on provisioning host or standalone) + info "Baremetal TNF deployment — cluster: ${CLUSTER_NAME}" + echo "" + + validate_tools + validate_sudo + validate_artifacts + validate_pull_secret + validate_config + echo "" + + setup_dev_scripts + echo "" + + # Run dev-scripts ABI pipeline — individual targets, skipping + # 'requirements' (01_install_requirements.sh) and 'configure' + # (02_configure_host.sh) which install libvirt/qemu packages and + # create VM networks. Baremetal deploys to real hardware via Redfish. + info "Starting dev-scripts ABI pipeline (baremetal — no VM setup)..." + info "This will take 30-60 minutes on baremetal nodes." + echo "" + + if make -C "${DEV_SCRIPTS_PATH}" agent_requirements agent_build_installer agent_prepare_release agent_configure agent_create_cluster; then + echo "" + info "Baremetal TNF cluster deployed successfully!" + info "Kubeconfig: ${DEV_SCRIPTS_PATH}/ocp/${CLUSTER_NAME}/auth/kubeconfig" + info "Console: https://console-openshift-console.apps.${CLUSTER_NAME}.$(grep -oP 'BASE_DOMAIN="\K[^"]+' "${CONFIG_FILE}" 2>/dev/null || echo '')/" + else + echo "" + echo "==========================================" + echo " DEPLOYMENT FAILED" + echo "==========================================" + echo "" + echo " To recover:" + echo " 1. Power off baremetal nodes via BMC" + echo " 2. Clean dev-scripts state:" + echo " make -C ${DEV_SCRIPTS_PATH} clean" + echo " 3. Fix the issue and re-run:" + echo " make baremetal-fencing-agent" + echo "" + exit 1 + fi +} + +main "$@" From 1053afe4e553726161897de1754365d240ca9d29 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Wed, 1 Jul 2026 12:58:03 +0200 Subject: [PATCH 14/20] Add fork/branch support, run requirements, remove laptop workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add --dev-scripts-repo and --dev-scripts-branch CLI args so the deploy script can clone from a fork or switch an existing checkout to a specific branch (needed for dev-scripts PRs not yet merged upstream) - Add dev_scripts_repo and dev_scripts_branch to [provisioning_host] inventory section, forwarded via SSH remote execution - Remove validate_tools() — dev-scripts' make requirements now runs as part of the pipeline, handling all dependency installation - Remove python wrapper and PYTHONPATH workarounds (Fedora laptop hacks, not needed on RHEL provisioning host) - Normalize [[ ]] && to if/then blocks for readability Co-Authored-By: Claude Opus 4.6 --- .../inventory_baremetal.ini.sample | 14 +- .../scripts/deploy-baremetal.sh | 126 ++++++++---------- 2 files changed, 64 insertions(+), 76 deletions(-) diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index 53d755b9..960a4def 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -60,13 +60,17 @@ cpu_arch=x86_64 # ssh_target - user@host for SSH access (key-based auth required) # # Optional: -# ssh_key - Path to SSH private key (default: ssh-agent or ~/.ssh/id_rsa) -# dev_scripts_path - DEV_SCRIPTS_PATH on the remote host -# (default: ~/openshift-metal3/dev-scripts) -# working_dir - Remote staging directory for TNT artifacts -# (default: ~/tnt-baremetal) +# ssh_key - Path to SSH private key (default: ssh-agent or ~/.ssh/id_rsa) +# dev_scripts_path - DEV_SCRIPTS_PATH on the remote host +# (default: ~/openshift-metal3/dev-scripts) +# dev_scripts_repo - Git repo URL for dev-scripts (default: upstream openshift-metal3) +# dev_scripts_branch - Git branch to checkout (default: repo's current branch) +# working_dir - Remote staging directory for TNT artifacts +# (default: ~/tnt-baremetal) # #ssh_target=root@10.1.155.50 #ssh_key=~/.ssh/lab_key #dev_scripts_path=~/openshift-metal3/dev-scripts +#dev_scripts_repo=https://github.com/myuser/dev-scripts +#dev_scripts_branch=my-feature-branch #working_dir=~/tnt-baremetal diff --git a/deploy/openshift-clusters/scripts/deploy-baremetal.sh b/deploy/openshift-clusters/scripts/deploy-baremetal.sh index 742f96f3..caaa7bd3 100755 --- a/deploy/openshift-clusters/scripts/deploy-baremetal.sh +++ b/deploy/openshift-clusters/scripts/deploy-baremetal.sh @@ -10,9 +10,11 @@ # deploy-baremetal.sh [options] # # Options: -# --cluster-name NAME Cluster name matching adoption artifacts (default: ostest) -# --dev-scripts-path PATH Path to dev-scripts checkout (default: ~/openshift-metal3/dev-scripts) -# -h, --help Show this help message +# --cluster-name NAME Cluster name matching adoption artifacts (default: ostest) +# --dev-scripts-path PATH Path to dev-scripts checkout (default: ~/openshift-metal3/dev-scripts) +# --dev-scripts-repo URL Git repo for dev-scripts (default: upstream openshift-metal3) +# --dev-scripts-branch BRANCH Git branch to checkout (default: repo's current branch) +# -h, --help Show this help message set -o nounset set -o errexit @@ -23,7 +25,8 @@ OC_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" CLUSTER_NAME="${CLUSTER_NAME:-ostest}" DEV_SCRIPTS_PATH="${DEV_SCRIPTS_PATH:-${HOME}/openshift-metal3/dev-scripts}" -DEV_SCRIPTS_REPO="https://github.com/openshift-metal3/dev-scripts" +DEV_SCRIPTS_REPO="${DEV_SCRIPTS_REPO:-https://github.com/openshift-metal3/dev-scripts}" +DEV_SCRIPTS_BRANCH="${DEV_SCRIPTS_BRANCH:-}" ############################################################################## # Helpers @@ -50,6 +53,14 @@ parse_args() { DEV_SCRIPTS_PATH="$2" shift 2 ;; + --dev-scripts-repo) + DEV_SCRIPTS_REPO="$2" + shift 2 + ;; + --dev-scripts-branch) + DEV_SCRIPTS_BRANCH="$2" + shift 2 + ;; -h|--help) head -15 "$0" | tail -10 exit 0 @@ -65,44 +76,6 @@ parse_args() { # Pre-flight validation ############################################################################## -REQUIRED_TOOLS=(podman oc jq curl dnsmasq firewall-cmd xmllint ansible-playbook go) - -validate_tools() { - local missing=() - for tool in "${REQUIRED_TOOLS[@]}"; do - if ! command -v "${tool}" &>/dev/null; then - missing+=("${tool}") - fi - done - - if [[ ${#missing[@]} -gt 0 ]]; then - echo "" - echo "Missing required tools: ${missing[*]}" - echo "" - echo "Install with:" - echo " sudo dnf install -y podman jq curl dnsmasq firewalld libxml2 ansible-core golang" - echo "" - echo "Then install oc:" - echo " Download from https://mirror.openshift.com/pub/openshift-v4/clients/ocp/stable/" - die "Install missing tools and re-run." - fi - info "Required tools present" - - # Ansible collections required by dev-scripts agent manifests - local collections_needed=0 - for col in ansible.utils ansible.netcommon ansible.posix community.general; do - if ! ansible-galaxy collection list 2>/dev/null | grep -q "${col}"; then - collections_needed=1 - break - fi - done - - if [[ ${collections_needed} -eq 1 ]]; then - info "Installing required Ansible collections..." - ansible-galaxy collection install 'ansible.netcommon<8.0.0' ansible.posix 'ansible.utils<6.0.0' community.general - fi -} - validate_sudo() { if ! sudo -n true 2>/dev/null; then die "Dev-scripts requires sudo access (package installs, network config, ironic dirs).\nRun this script from an interactive terminal, or configure passwordless sudo." @@ -189,10 +162,23 @@ validate_config() { setup_dev_scripts() { if [[ ! -d "${DEV_SCRIPTS_PATH}" ]]; then - info "Dev-scripts not found at ${DEV_SCRIPTS_PATH}, cloning..." - git clone "${DEV_SCRIPTS_REPO}" "${DEV_SCRIPTS_PATH}" + info "Cloning dev-scripts from ${DEV_SCRIPTS_REPO}${DEV_SCRIPTS_BRANCH:+ (branch: ${DEV_SCRIPTS_BRANCH})}..." + local clone_args=("${DEV_SCRIPTS_REPO}" "${DEV_SCRIPTS_PATH}") + if [[ -n "${DEV_SCRIPTS_BRANCH}" ]]; then + clone_args=(-b "${DEV_SCRIPTS_BRANCH}" "${clone_args[@]}") + fi + git clone "${clone_args[@]}" else info "Using dev-scripts at ${DEV_SCRIPTS_PATH}" + if [[ -n "${DEV_SCRIPTS_BRANCH}" ]]; then + local current_branch + current_branch=$(git -C "${DEV_SCRIPTS_PATH}" rev-parse --abbrev-ref HEAD) + if [[ "${current_branch}" != "${DEV_SCRIPTS_BRANCH}" ]]; then + info "Switching dev-scripts from ${current_branch} to ${DEV_SCRIPTS_BRANCH}..." + git -C "${DEV_SCRIPTS_PATH}" fetch --all --quiet + git -C "${DEV_SCRIPTS_PATH}" checkout "${DEV_SCRIPTS_BRANCH}" + fi + fi fi [[ -f "${DEV_SCRIPTS_PATH}/Makefile" ]] || \ @@ -226,21 +212,6 @@ setup_dev_scripts() { info "Setting PROVISIONING_HOST_EXTERNAL_IP=${dns_ip} (from BAREMETAL_DNS/BAREMETAL_GATEWAY)" fi - # Ensure 'python' resolves — dev-scripts' nth_ip() calls bare 'python', - # and Fedora only ships 'python3'. - if ! command -v python &>/dev/null && command -v python3 &>/dev/null; then - local python_wrapper="${DEV_SCRIPTS_PATH}/.local-bin" - mkdir -p "${python_wrapper}" - ln -sf "$(command -v python3)" "${python_wrapper}/python" - export PATH="${python_wrapper}:${PATH}" - fi - - # Ansible collections installed in user home need to be on PYTHONPATH - # for nth_ip() which calls python directly (not via ansible-playbook). - if [[ -d "${HOME}/.ansible/collections" ]]; then - export PYTHONPATH="${HOME}/.ansible/collections:${PYTHONPATH:-}" - fi - info "Deploying config to dev-scripts" { cat "${CONFIG_FILE}" @@ -281,6 +252,8 @@ parse_provisioning_host() { PROV_SSH_TARGET="" PROV_SSH_KEY="" PROV_DEV_SCRIPTS_PATH="" + PROV_DEV_SCRIPTS_REPO="" + PROV_DEV_SCRIPTS_BRANCH="" PROV_WORKING_DIR="tnt-baremetal" [[ -f "${inventory}" ]] || return 0 @@ -306,8 +279,10 @@ parse_provisioning_host() { case "${key}" in ssh_target) PROV_SSH_TARGET="${val}" ;; ssh_key) PROV_SSH_KEY="${val}" ;; - dev_scripts_path) PROV_DEV_SCRIPTS_PATH="${val}" ;; - working_dir) PROV_WORKING_DIR="${val}" ;; + dev_scripts_path) PROV_DEV_SCRIPTS_PATH="${val}" ;; + dev_scripts_repo) PROV_DEV_SCRIPTS_REPO="${val}" ;; + dev_scripts_branch) PROV_DEV_SCRIPTS_BRANCH="${val}" ;; + working_dir) PROV_WORKING_DIR="${val}" ;; esac fi done < "${inventory}" @@ -315,14 +290,18 @@ parse_provisioning_host() { build_ssh_opts() { SSH_OPTS=(-o "ServerAliveInterval=30" -o "ServerAliveCountMax=120") - [[ -n "${PROV_SSH_KEY:-}" ]] && SSH_OPTS+=(-i "${PROV_SSH_KEY}") + if [[ -n "${PROV_SSH_KEY:-}" ]]; then + SSH_OPTS+=(-i "${PROV_SSH_KEY}") + fi } validate_ssh_connectivity() { info "Validating SSH access to ${PROV_SSH_TARGET}..." local opts=(-o "ConnectTimeout=10" -o "BatchMode=yes") - [[ -n "${PROV_SSH_KEY:-}" ]] && opts+=(-i "${PROV_SSH_KEY}") + if [[ -n "${PROV_SSH_KEY:-}" ]]; then + opts+=(-i "${PROV_SSH_KEY}") + fi if ! ssh "${opts[@]}" "${PROV_SSH_TARGET}" "true" 2>/dev/null; then die "Cannot SSH to provisioning host: ${PROV_SSH_TARGET} @@ -373,8 +352,15 @@ exec_on_remote() { # shellcheck disable=SC2088 # tilde expands on the remote shell via SSH local remote_script="~/${PROV_WORKING_DIR}/scripts/deploy-baremetal.sh" local remote_args="--cluster-name ${CLUSTER_NAME}" - [[ -n "${PROV_DEV_SCRIPTS_PATH:-}" ]] && \ + if [[ -n "${PROV_DEV_SCRIPTS_PATH:-}" ]]; then remote_args+=" --dev-scripts-path ${PROV_DEV_SCRIPTS_PATH}" + fi + if [[ -n "${PROV_DEV_SCRIPTS_REPO:-}" ]]; then + remote_args+=" --dev-scripts-repo ${PROV_DEV_SCRIPTS_REPO}" + fi + if [[ -n "${PROV_DEV_SCRIPTS_BRANCH:-}" ]]; then + remote_args+=" --dev-scripts-branch ${PROV_DEV_SCRIPTS_BRANCH}" + fi info "Executing deploy on ${PROV_SSH_TARGET}..." info "Remote output follows:" @@ -443,7 +429,6 @@ main() { info "Baremetal TNF deployment — cluster: ${CLUSTER_NAME}" echo "" - validate_tools validate_sudo validate_artifacts validate_pull_secret @@ -453,15 +438,14 @@ main() { setup_dev_scripts echo "" - # Run dev-scripts ABI pipeline — individual targets, skipping - # 'requirements' (01_install_requirements.sh) and 'configure' - # (02_configure_host.sh) which install libvirt/qemu packages and - # create VM networks. Baremetal deploys to real hardware via Redfish. - info "Starting dev-scripts ABI pipeline (baremetal — no VM setup)..." + # Run dev-scripts ABI pipeline. Includes 'requirements' to install system + # dependencies (nmstate, netaddr, ansible, etc.). Skips 'configure' + # (02_configure_host.sh) which creates libvirt networks and VMs. + info "Starting dev-scripts ABI pipeline..." info "This will take 30-60 minutes on baremetal nodes." echo "" - if make -C "${DEV_SCRIPTS_PATH}" agent_requirements agent_build_installer agent_prepare_release agent_configure agent_create_cluster; then + if make -C "${DEV_SCRIPTS_PATH}" requirements agent_requirements agent_build_installer agent_prepare_release agent_configure agent_create_cluster; then echo "" info "Baremetal TNF cluster deployed successfully!" info "Kubeconfig: ${DEV_SCRIPTS_PATH}/ocp/${CLUSTER_NAME}/auth/kubeconfig" From 368571b930ef58d455110a21bac3ccaa6cd30cfa Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Wed, 1 Jul 2026 18:19:22 +0200 Subject: [PATCH 15/20] Switch baremetal deploy to DHCP networking mode Use TNF_IPV4_DHCP instead of TNF_IPV4 to skip static nmstate generation in agent-config. Nodes get IP/DNS/gateway from the lab DHCP server, avoiding the gateway-as-DNS bug in the static networking template. Co-Authored-By: Claude Opus 4.6 --- deploy/openshift-clusters/scripts/baremetal-adopt.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/openshift-clusters/scripts/baremetal-adopt.sh b/deploy/openshift-clusters/scripts/baremetal-adopt.sh index 744a8ed6..b6a27e30 100755 --- a/deploy/openshift-clusters/scripts/baremetal-adopt.sh +++ b/deploy/openshift-clusters/scripts/baremetal-adopt.sh @@ -421,7 +421,7 @@ generate_baremetal_config() { echo "export MANAGE_BR_BRIDGE=n" echo "export MANAGE_PRO_BRIDGE=n" echo "export MANAGE_INT_BRIDGE=n" - echo "export AGENT_E2E_TEST_SCENARIO=\"TNF_IPV4\"" + echo "export AGENT_E2E_TEST_SCENARIO=\"TNF_IPV4_DHCP\"" if [[ -n "${MACHINE_NETWORK}" || -n "${GATEWAY}" || -n "${API_VIP}" || -n "${INGRESS_VIP}" ]]; then echo "" From dc00a3bba760a47360e47c33d35ba88ebcfd3295 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Wed, 1 Jul 2026 18:20:05 +0200 Subject: [PATCH 16/20] Remove PROVISIONING_HOST_EXTERNAL_IP resolution from deploy-baremetal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With DHCP networking mode, nodes get DNS from the lab DHCP server instead of static nmstate config. The PROVISIONING_HOST_EXTERNAL_IP variable is no longer needed for node configuration — dev-scripts provides a harmless default via network.sh. Co-Authored-By: Claude Opus 4.6 --- .../scripts/deploy-baremetal.sh | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/deploy/openshift-clusters/scripts/deploy-baremetal.sh b/deploy/openshift-clusters/scripts/deploy-baremetal.sh index caaa7bd3..d7c4179a 100755 --- a/deploy/openshift-clusters/scripts/deploy-baremetal.sh +++ b/deploy/openshift-clusters/scripts/deploy-baremetal.sh @@ -194,24 +194,6 @@ setup_dev_scripts() { fi mkdir -p "${working_dir}" - # Resolve PROVISIONING_HOST_EXTERNAL_IP for baremetal. - # In the libvirt flow this is the host running dnsmasq on the virtual bridge. - # For baremetal, the nodes use real network infrastructure — default to the - # gateway as DNS server (common in lab networks). Override with BAREMETAL_DNS - # if the lab has a dedicated DNS server. - local prov_ip="" - prov_ip=$(grep -oP '^export PROVISIONING_HOST_EXTERNAL_IP="\K[^"]+' "${CONFIG_FILE}" || true) - if [[ -z "${prov_ip}" ]]; then - local dns_ip="${BAREMETAL_DNS:-}" - if [[ -z "${dns_ip}" ]]; then - dns_ip=$(grep -oP '^export BAREMETAL_GATEWAY="\K[^"]+' "${CONFIG_FILE}" || true) - fi - if [[ -z "${dns_ip}" ]]; then - die "Cannot determine DNS server for nodes. Set BAREMETAL_DNS or BAREMETAL_GATEWAY in config." - fi - info "Setting PROVISIONING_HOST_EXTERNAL_IP=${dns_ip} (from BAREMETAL_DNS/BAREMETAL_GATEWAY)" - fi - info "Deploying config to dev-scripts" { cat "${CONFIG_FILE}" @@ -220,11 +202,6 @@ setup_dev_scripts() { echo "# Working directory (set by deploy-baremetal.sh)" echo "export WORKING_DIR=\"${working_dir}\"" fi - if [[ -z "${prov_ip}" && -n "${dns_ip:-}" ]]; then - echo "" - echo "# DNS/gateway for node NMState config (set by deploy-baremetal.sh)" - echo "export PROVISIONING_HOST_EXTERNAL_IP=\"${dns_ip}\"" - fi } > "${DEV_SCRIPTS_PATH}/config_${ds_user}.sh" cp "${PULL_SECRET}" "${DEV_SCRIPTS_PATH}/pull_secret.json" cp "${NODES_FILE}" "${DEV_SCRIPTS_PATH}/ironic_nodes.json" From 9f45936f0bda0cf9e59cc93f0d99cf0d4808e192 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Thu, 2 Jul 2026 11:51:42 +0200 Subject: [PATCH 17/20] Convert deploy-baremetal.sh to Ansible playbook Replace the 448-line shell script with a 90-line Ansible playbook (deploy-baremetal.yml) that reuses the install-dev role's config validation via include_role tasks_from: config. The shell script is gutted to a thin wrapper (~45 lines) that validates inventory_baremetal.ini has a configured [provisioning_host] and calls ansible-playbook, matching the deploy-cluster.sh pattern. - Playbook targets [provisioning_host] group from inventory_baremetal.ini - SSH/rsync remote execution replaced by Ansible's native connection layer - Config/pull-secret validation reused from install-dev role (zero duplication) - Dev-scripts ABI pipeline via make module loop (6 targets) - Credentials fetched back to controller via fetch module - Error recovery block with cleanup instructions Also updates inventory_baremetal.ini.sample to use standard Ansible inventory format for [provisioning_host] section. Co-Authored-By: Claude Opus 4.6 --- deploy/Makefile | 2 +- .../openshift-clusters/deploy-baremetal.yml | 127 +++++ .../inventory_baremetal.ini.sample | 32 +- .../scripts/deploy-baremetal.sh | 465 ++---------------- 4 files changed, 175 insertions(+), 451 deletions(-) create mode 100644 deploy/openshift-clusters/deploy-baremetal.yml diff --git a/deploy/Makefile b/deploy/Makefile index 7558c7fd..98872591 100644 --- a/deploy/Makefile +++ b/deploy/Makefile @@ -152,7 +152,7 @@ help: @echo "" @echo "Baremetal Adoption:" @echo " baremetal-adopt - Adopt baremetal nodes: validate BMC + generate dev-scripts artifacts" - @echo " baremetal-fencing-agent - Deploy TNF cluster on adopted baremetal nodes (local dev-scripts ABI)" + @echo " baremetal-fencing-agent - Deploy TNF cluster on adopted baremetal nodes via provisioning host" @echo " baremetal-verify - Verify BMC credentials for adopted baremetal nodes (no artifacts)" @echo " baremetal-wizard - Interactive wizard to create baremetal node inventory" @echo "" diff --git a/deploy/openshift-clusters/deploy-baremetal.yml b/deploy/openshift-clusters/deploy-baremetal.yml new file mode 100644 index 00000000..d75eb8ae --- /dev/null +++ b/deploy/openshift-clusters/deploy-baremetal.yml @@ -0,0 +1,127 @@ +--- +# Deploy a TNF fencing cluster on adopted baremetal nodes via dev-scripts ABI. +# +# Targets the [provisioning_host] group from inventory_baremetal.ini. +# Expects adoption artifacts from 'make baremetal-adopt' in +# roles/dev-scripts/install-dev/files/. +# +# Usage: +# ansible-playbook deploy-baremetal.yml -i inventory_baremetal.ini +# ansible-playbook deploy-baremetal.yml -i inventory_baremetal.ini -e dev_scripts_branch=my-branch +# +- hosts: provisioning_host + gather_facts: no + force_handlers: yes + + vars: + method: agent + topology: fencing + + pre_tasks: + - name: Check adoption artifacts exist on controller + ansible.builtin.stat: + path: "{{ playbook_dir }}/roles/dev-scripts/install-dev/files/{{ item }}" + delegate_to: localhost + become: false + register: artifact_check + loop: + - config_baremetal_fencing.sh + - ironic_nodes.json + + - name: Fail if adoption artifacts are missing + ansible.builtin.fail: + msg: >- + Adoption artifact not found: {{ item.item }}. + Run 'make baremetal-adopt' first. + when: not item.stat.exists + loop: "{{ artifact_check.results }}" + loop_control: + label: "{{ item.item }}" + + tasks: + # --- Validation, config deploy, pull-secret (reused from install-dev role) --- + - name: Validate and deploy config + pull-secret + ansible.builtin.include_role: + name: dev-scripts/install-dev + tasks_from: config + vars: + config_file: + agent: config_baremetal_fencing.sh + + # --- Git checkout --- + - name: Checkout dev-scripts + ansible.builtin.git: + dest: "{{ dev_scripts_path }}" + repo: "{{ dev_scripts_src_repo }}" + version: "{{ dev_scripts_branch }}" + + # --- Baremetal-specific setup --- + - name: Copy ironic_nodes.json to dev-scripts + ansible.builtin.copy: + src: "{{ playbook_dir }}/roles/dev-scripts/install-dev/files/ironic_nodes.json" + dest: "{{ dev_scripts_path }}/ironic_nodes.json" + mode: "0600" + + - name: Create working directory + ansible.builtin.shell: mkdir -p "${HOME}/dev-scripts-workdir" + changed_when: false + + - name: Append WORKING_DIR to deployed config + ansible.builtin.lineinfile: + path: "{{ dev_scripts_path }}/config_{{ whoami.stdout }}.sh" + regexp: '^export WORKING_DIR=' + line: 'export WORKING_DIR="${HOME}/dev-scripts-workdir"' + + - name: Create empty mirror registry credentials + ansible.builtin.shell: | + f="${HOME}/private-mirror-{{ test_cluster_name }}.json" + [ -f "$f" ] || echo '{"auths":{}}' > "$f" + changed_when: false + + # --- Deploy --- + - name: Run dev-scripts ABI pipeline + block: + - name: "dev-scripts: {{ item }}" + make: + chdir: "{{ dev_scripts_path }}" + target: "{{ item }}" + loop: + - requirements + - agent_requirements + - agent_build_installer + - agent_prepare_release + - agent_configure + - agent_create_cluster + rescue: + - name: Display recovery instructions + ansible.builtin.debug: + msg: | + DEPLOYMENT FAILED. To recover: + 1. Power off baremetal nodes via BMC + 2. Clean dev-scripts state: + make -C {{ dev_scripts_path }} clean + 3. Fix the issue and re-run: + make baremetal-fencing-agent + - name: Fail after displaying recovery steps + ansible.builtin.fail: + msg: "dev-scripts ABI pipeline failed" + + # --- Post-deploy: fetch credentials to controller --- + - name: Fetch kubeconfig to controller + ansible.builtin.fetch: + src: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeconfig" + dest: "clusters/{{ test_cluster_name }}/auth/kubeconfig" + flat: true + + - name: Fetch kubeadmin-password to controller + ansible.builtin.fetch: + src: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeadmin-password" + dest: "clusters/{{ test_cluster_name }}/auth/kubeadmin-password" + flat: true + + - name: Display access information + ansible.builtin.debug: + msg: | + Baremetal TNF cluster deployed successfully! + Kubeconfig: clusters/{{ test_cluster_name }}/auth/kubeconfig + Usage: export KUBECONFIG=$(pwd)/clusters/{{ test_cluster_name }}/auth/kubeconfig diff --git a/deploy/openshift-clusters/inventory_baremetal.ini.sample b/deploy/openshift-clusters/inventory_baremetal.ini.sample index 960a4def..a145e459 100644 --- a/deploy/openshift-clusters/inventory_baremetal.ini.sample +++ b/deploy/openshift-clusters/inventory_baremetal.ini.sample @@ -44,33 +44,31 @@ cpu_arch=x86_64 # gateway - Default gateway IP # api_vip - API virtual IP # ingress_vip - Ingress virtual IP +# dns_servers - Comma-separated DNS server IPs reachable from the nodes. +# Used as the node DNS resolver during install (resolves quay.io, etc.). +# If unset, falls back to gateway — which may not run DNS. #machine_network=192.168.1.0/24 #gateway=192.168.1.1 #api_vip=192.168.1.100 #ingress_vip=192.168.1.101 +#dns_servers=10.11.5.160,10.2.32.85 [provisioning_host] # Provisioning host for baremetal ABI deployment. Must be on the same L2 network # as the baremetal nodes (serves agent ISO via HTTP, runs dnsmasq, acts as gateway). # -# If this section is configured, deploy-baremetal.sh syncs artifacts to the host -# and runs the deployment there via SSH. If absent, deployment runs locally. +# Standard Ansible inventory format — one host entry with connection variables. +# For local deployment, use: localhost ansible_connection=local # -# Required: -# ssh_target - user@host for SSH access (key-based auth required) -# -# Optional: -# ssh_key - Path to SSH private key (default: ssh-agent or ~/.ssh/id_rsa) -# dev_scripts_path - DEV_SCRIPTS_PATH on the remote host -# (default: ~/openshift-metal3/dev-scripts) -# dev_scripts_repo - Git repo URL for dev-scripts (default: upstream openshift-metal3) -# dev_scripts_branch - Git branch to checkout (default: repo's current branch) -# working_dir - Remote staging directory for TNT artifacts -# (default: ~/tnt-baremetal) +#10.1.155.50 ansible_user=root ansible_ssh_private_key_file=~/.ssh/lab_key + +[provisioning_host:vars] +# Override dev-scripts checkout on the provisioning host (optional). +# Defaults come from roles/dev-scripts/install-dev/defaults/main.yml: +# dev_scripts_path=openshift-metal3/dev-scripts +# dev_scripts_src_repo=https://github.com/openshift-metal3/dev-scripts +# dev_scripts_branch=master # -#ssh_target=root@10.1.155.50 -#ssh_key=~/.ssh/lab_key #dev_scripts_path=~/openshift-metal3/dev-scripts -#dev_scripts_repo=https://github.com/myuser/dev-scripts +#dev_scripts_src_repo=https://github.com/myuser/dev-scripts #dev_scripts_branch=my-feature-branch -#working_dir=~/tnt-baremetal diff --git a/deploy/openshift-clusters/scripts/deploy-baremetal.sh b/deploy/openshift-clusters/scripts/deploy-baremetal.sh index d7c4179a..a5e8c161 100755 --- a/deploy/openshift-clusters/scripts/deploy-baremetal.sh +++ b/deploy/openshift-clusters/scripts/deploy-baremetal.sh @@ -1,20 +1,14 @@ #!/usr/bin/bash # -# Deploy a TNF fencing cluster on adopted baremetal nodes via dev-scripts ABI. +# Deploy a TNF fencing cluster on adopted baremetal nodes. +# +# Thin wrapper that calls the deploy-baremetal.yml Ansible playbook +# targeting the [provisioning_host] group from inventory_baremetal.ini. # -# If [provisioning_host] is configured in inventory_baremetal.ini, syncs -# artifacts and executes on the remote host via SSH. Otherwise runs locally. # Expects adoption artifacts from 'make baremetal-adopt'. # # Usage: -# deploy-baremetal.sh [options] -# -# Options: -# --cluster-name NAME Cluster name matching adoption artifacts (default: ostest) -# --dev-scripts-path PATH Path to dev-scripts checkout (default: ~/openshift-metal3/dev-scripts) -# --dev-scripts-repo URL Git repo for dev-scripts (default: upstream openshift-metal3) -# --dev-scripts-branch BRANCH Git branch to checkout (default: repo's current branch) -# -h, --help Show this help message +# deploy-baremetal.sh [-- ] set -o nounset set -o errexit @@ -23,425 +17,30 @@ set -o pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" OC_DIR="$(cd "${SCRIPT_DIR}/.." && pwd)" -CLUSTER_NAME="${CLUSTER_NAME:-ostest}" -DEV_SCRIPTS_PATH="${DEV_SCRIPTS_PATH:-${HOME}/openshift-metal3/dev-scripts}" -DEV_SCRIPTS_REPO="${DEV_SCRIPTS_REPO:-https://github.com/openshift-metal3/dev-scripts}" -DEV_SCRIPTS_BRANCH="${DEV_SCRIPTS_BRANCH:-}" - -############################################################################## -# Helpers -############################################################################## - -die() { echo "Error: $*" >&2; exit 1; } - -info() { echo "==> $*"; } - -warn() { echo " WARNING: $*" >&2; } - -############################################################################## -# Argument parsing -############################################################################## - -parse_args() { - while [[ $# -gt 0 ]]; do - case $1 in - --cluster-name) - CLUSTER_NAME="$2" - shift 2 - ;; - --dev-scripts-path) - DEV_SCRIPTS_PATH="$2" - shift 2 - ;; - --dev-scripts-repo) - DEV_SCRIPTS_REPO="$2" - shift 2 - ;; - --dev-scripts-branch) - DEV_SCRIPTS_BRANCH="$2" - shift 2 - ;; - -h|--help) - head -15 "$0" | tail -10 - exit 0 - ;; - *) - die "Unknown option: $1. Run '$0 --help' for usage." - ;; - esac - done -} - -############################################################################## -# Pre-flight validation -############################################################################## - -validate_sudo() { - if ! sudo -n true 2>/dev/null; then - die "Dev-scripts requires sudo access (package installs, network config, ironic dirs).\nRun this script from an interactive terminal, or configure passwordless sudo." - fi -} - -validate_artifacts() { - local cluster_dir="${OC_DIR}/clusters/${CLUSTER_NAME}" - - [[ -d "${cluster_dir}" ]] || \ - die "Cluster directory not found: ${cluster_dir}\nRun 'make baremetal-adopt' first." - - CONFIG_FILE="${cluster_dir}/config_baremetal_fencing.sh" - NODES_FILE="${cluster_dir}/ironic_nodes.json" - - [[ -f "${CONFIG_FILE}" ]] || \ - die "Config not found: ${CONFIG_FILE}\nRun 'make baremetal-adopt' first." - [[ -f "${NODES_FILE}" ]] || \ - die "Nodes file not found: ${NODES_FILE}\nRun 'make baremetal-adopt' first." - - info "Adoption artifacts found in ${cluster_dir}" -} - -validate_pull_secret() { - PULL_SECRET="${OC_DIR}/roles/dev-scripts/install-dev/files/pull-secret.json" - - [[ -f "${PULL_SECRET}" ]] || \ - die "Pull secret not found: ${PULL_SECRET}\nCopy your pull secret to this path." - - if ! jq empty "${PULL_SECRET}" 2>/dev/null; then - die "Invalid JSON in ${PULL_SECRET}\nValidate with: python3 -m json.tool ${PULL_SECRET}" - fi -} - -validate_config() { - local config_content - config_content=$(<"${CONFIG_FILE}") - - # AGENT_E2E_TEST_SCENARIO must be set - if ! grep -qE '^export AGENT_E2E_TEST_SCENARIO=' <<<"${config_content}"; then - die "AGENT_E2E_TEST_SCENARIO not set in ${CONFIG_FILE}.\nThis should have been added by 'make baremetal-adopt'. Re-run adoption." - fi - - # CI token validation (if using CI registry) - local ci_token="" - ci_token=$(grep -oP '^export CI_TOKEN="\K[^"]+' <<<"${config_content}" || true) - - if [[ -n "${ci_token}" ]]; then - local release_registry="" - release_registry=$(grep -oP '^export OPENSHIFT_RELEASE_IMAGE="?\K[^/"]+' <<<"${config_content}" || true) - - if [[ "${release_registry}" == "registry.ci.openshift.org" ]]; then - info "Validating CI token against ${release_registry}..." - local http_code - http_code=$(curl -s -o /dev/null -w '%{http_code}' \ - -H "Authorization: Bearer ${ci_token}" \ - "https://${release_registry}/v2/" 2>/dev/null) || http_code="000" - - if [[ "${http_code}" != "200" ]]; then - die "CI token is invalid or expired for ${release_registry} (HTTP ${http_code}).\nUpdate CI_TOKEN in your base config and re-run 'make baremetal-adopt'." - fi - info "CI token valid" - fi - fi - - # CI registry in pull-secret check - if grep -q 'registry.ci.openshift.org' <<<"${config_content}"; then - if ! jq -e '.auths["registry.ci.openshift.org"]' "${PULL_SECRET}" >/dev/null 2>&1; then - die "Config uses CI registry but pull secret lacks registry.ci.openshift.org credentials." - fi - fi - - # BAREMETAL_ISO_SERVER warning - if ! grep -qE '^export BAREMETAL_ISO_SERVER=' <<<"${config_content}"; then - warn "BAREMETAL_ISO_SERVER not set in config." - warn "You must set this in dev-scripts config before running 'make agent'." - warn "The ISO server must be reachable from BMC networks (not the provisioning host)." - fi -} - -############################################################################## -# Dev-scripts setup -############################################################################## - -setup_dev_scripts() { - if [[ ! -d "${DEV_SCRIPTS_PATH}" ]]; then - info "Cloning dev-scripts from ${DEV_SCRIPTS_REPO}${DEV_SCRIPTS_BRANCH:+ (branch: ${DEV_SCRIPTS_BRANCH})}..." - local clone_args=("${DEV_SCRIPTS_REPO}" "${DEV_SCRIPTS_PATH}") - if [[ -n "${DEV_SCRIPTS_BRANCH}" ]]; then - clone_args=(-b "${DEV_SCRIPTS_BRANCH}" "${clone_args[@]}") - fi - git clone "${clone_args[@]}" - else - info "Using dev-scripts at ${DEV_SCRIPTS_PATH}" - if [[ -n "${DEV_SCRIPTS_BRANCH}" ]]; then - local current_branch - current_branch=$(git -C "${DEV_SCRIPTS_PATH}" rev-parse --abbrev-ref HEAD) - if [[ "${current_branch}" != "${DEV_SCRIPTS_BRANCH}" ]]; then - info "Switching dev-scripts from ${current_branch} to ${DEV_SCRIPTS_BRANCH}..." - git -C "${DEV_SCRIPTS_PATH}" fetch --all --quiet - git -C "${DEV_SCRIPTS_PATH}" checkout "${DEV_SCRIPTS_BRANCH}" - fi - fi - fi - - [[ -f "${DEV_SCRIPTS_PATH}/Makefile" ]] || \ - die "Invalid dev-scripts checkout: ${DEV_SCRIPTS_PATH} (no Makefile found)" - - local ds_user - ds_user=$(whoami) - - # Set WORKING_DIR in config if not already present — avoids sudo for /opt/dev-scripts - local working_dir="${WORKING_DIR:-${HOME}/dev-scripts-workdir}" - if ! grep -qE '^export WORKING_DIR=' "${CONFIG_FILE}"; then - info "Setting WORKING_DIR=${working_dir}" - fi - mkdir -p "${working_dir}" - - info "Deploying config to dev-scripts" - { - cat "${CONFIG_FILE}" - if ! grep -qE '^export WORKING_DIR=' "${CONFIG_FILE}"; then - echo "" - echo "# Working directory (set by deploy-baremetal.sh)" - echo "export WORKING_DIR=\"${working_dir}\"" - fi - } > "${DEV_SCRIPTS_PATH}/config_${ds_user}.sh" - cp "${PULL_SECRET}" "${DEV_SCRIPTS_PATH}/pull_secret.json" - cp "${NODES_FILE}" "${DEV_SCRIPTS_PATH}/ironic_nodes.json" - - # REGISTRY_CREDS defaults to ~/private-mirror-.json — the local - # mirror registry credentials. Baremetal deploys don't run a local registry, - # so create an empty auth file to prevent jq merge failures in write_pull_secret(). - local registry_creds="${HOME}/private-mirror-${CLUSTER_NAME}.json" - if [[ ! -f "${registry_creds}" ]]; then - echo '{"auths":{}}' > "${registry_creds}" - info " mirror → ${registry_creds} (empty — no local registry)" - fi - - info " config → config_${ds_user}.sh" - info " secret → pull_secret.json" - info " nodes → ironic_nodes.json" -} - -############################################################################## -# Remote execution (provisioning host) -############################################################################## - -parse_provisioning_host() { - local inventory="${OC_DIR}/inventory_baremetal.ini" - PROV_SSH_TARGET="" - PROV_SSH_KEY="" - PROV_DEV_SCRIPTS_PATH="" - PROV_DEV_SCRIPTS_REPO="" - PROV_DEV_SCRIPTS_BRANCH="" - PROV_WORKING_DIR="tnt-baremetal" - - [[ -f "${inventory}" ]] || return 0 - - local in_section=false - while IFS= read -r line || [[ -n "${line}" ]]; do - line="${line%%#*}" - line="${line#"${line%%[![:space:]]*}"}" - line="${line%"${line##*[![:space:]]}"}" - [[ -z "${line}" ]] && continue - - if [[ "${line}" == "[provisioning_host]" ]]; then - in_section=true - continue - elif [[ "${line}" =~ ^\[.*\] ]]; then - in_section=false - continue - fi - - if ${in_section}; then - local key="${line%%=*}" - local val="${line#*=}" - case "${key}" in - ssh_target) PROV_SSH_TARGET="${val}" ;; - ssh_key) PROV_SSH_KEY="${val}" ;; - dev_scripts_path) PROV_DEV_SCRIPTS_PATH="${val}" ;; - dev_scripts_repo) PROV_DEV_SCRIPTS_REPO="${val}" ;; - dev_scripts_branch) PROV_DEV_SCRIPTS_BRANCH="${val}" ;; - working_dir) PROV_WORKING_DIR="${val}" ;; - esac - fi - done < "${inventory}" -} - -build_ssh_opts() { - SSH_OPTS=(-o "ServerAliveInterval=30" -o "ServerAliveCountMax=120") - if [[ -n "${PROV_SSH_KEY:-}" ]]; then - SSH_OPTS+=(-i "${PROV_SSH_KEY}") - fi -} - -validate_ssh_connectivity() { - info "Validating SSH access to ${PROV_SSH_TARGET}..." - - local opts=(-o "ConnectTimeout=10" -o "BatchMode=yes") - if [[ -n "${PROV_SSH_KEY:-}" ]]; then - opts+=(-i "${PROV_SSH_KEY}") - fi - - if ! ssh "${opts[@]}" "${PROV_SSH_TARGET}" "true" 2>/dev/null; then - die "Cannot SSH to provisioning host: ${PROV_SSH_TARGET} -Ensure: - 1. SSH key-based auth is configured - 2. The host is reachable from this machine - 3. ssh_key is set in inventory_baremetal.ini if using a non-default key" - fi - - if ! ssh "${opts[@]}" "${PROV_SSH_TARGET}" "command -v rsync" &>/dev/null; then - die "rsync not found on provisioning host. -Install with: ssh ${PROV_SSH_TARGET} 'sudo dnf install -y rsync'" - fi - - info "SSH connectivity OK" -} - -sync_to_remote() { - local remote_dir="${PROV_WORKING_DIR}" - - info "Syncing artifacts to ${PROV_SSH_TARGET}:~/${remote_dir}" - - # shellcheck disable=SC2029 - ssh "${SSH_OPTS[@]}" "${PROV_SSH_TARGET}" \ - "mkdir -p ~/${remote_dir}/{scripts,clusters/${CLUSTER_NAME},roles/dev-scripts/install-dev/files}" - - rsync -az -e "ssh ${SSH_OPTS[*]}" \ - "${SCRIPT_DIR}/deploy-baremetal.sh" \ - "${PROV_SSH_TARGET}:~/${remote_dir}/scripts/" - - rsync -az -e "ssh ${SSH_OPTS[*]}" \ - "${OC_DIR}/clusters/${CLUSTER_NAME}/config_baremetal_fencing.sh" \ - "${OC_DIR}/clusters/${CLUSTER_NAME}/ironic_nodes.json" \ - "${PROV_SSH_TARGET}:~/${remote_dir}/clusters/${CLUSTER_NAME}/" - - rsync -az -e "ssh ${SSH_OPTS[*]}" \ - "${PULL_SECRET}" \ - "${PROV_SSH_TARGET}:~/${remote_dir}/roles/dev-scripts/install-dev/files/pull-secret.json" - - rsync -az -e "ssh ${SSH_OPTS[*]}" \ - "${OC_DIR}/inventory_baremetal.ini" \ - "${PROV_SSH_TARGET}:~/${remote_dir}/inventory_baremetal.ini" - - info "Sync complete" -} - -exec_on_remote() { - # shellcheck disable=SC2088 # tilde expands on the remote shell via SSH - local remote_script="~/${PROV_WORKING_DIR}/scripts/deploy-baremetal.sh" - local remote_args="--cluster-name ${CLUSTER_NAME}" - if [[ -n "${PROV_DEV_SCRIPTS_PATH:-}" ]]; then - remote_args+=" --dev-scripts-path ${PROV_DEV_SCRIPTS_PATH}" - fi - if [[ -n "${PROV_DEV_SCRIPTS_REPO:-}" ]]; then - remote_args+=" --dev-scripts-repo ${PROV_DEV_SCRIPTS_REPO}" - fi - if [[ -n "${PROV_DEV_SCRIPTS_BRANCH:-}" ]]; then - remote_args+=" --dev-scripts-branch ${PROV_DEV_SCRIPTS_BRANCH}" - fi - - info "Executing deploy on ${PROV_SSH_TARGET}..." - info "Remote output follows:" - echo "==========================================" - - # shellcheck disable=SC2029 - ssh -tt "${SSH_OPTS[@]}" "${PROV_SSH_TARGET}" \ - "TNT_REMOTE_EXEC=1 bash ${remote_script} ${remote_args}" -} - -fetch_credentials() { - local remote_home - remote_home=$(ssh "${SSH_OPTS[@]}" "${PROV_SSH_TARGET}" 'echo ${HOME}') - local remote_ds="${PROV_DEV_SCRIPTS_PATH:-${remote_home}/openshift-metal3/dev-scripts}" - local remote_auth="${remote_ds}/ocp/${CLUSTER_NAME}/auth" - local local_auth="${OC_DIR}/clusters/${CLUSTER_NAME}/auth" - - info "Fetching cluster credentials from provisioning host..." - mkdir -p "${local_auth}" - - rsync -az -e "ssh ${SSH_OPTS[*]}" \ - "${PROV_SSH_TARGET}:${remote_auth}/" \ - "${local_auth}/" - - info "Kubeconfig: ${local_auth}/kubeconfig" - info "Password: ${local_auth}/kubeadmin-password" -} - -############################################################################## -# Main -############################################################################## - -main() { - parse_args "$@" - - # Remote execution: if [provisioning_host] is configured and we're not - # already running on the remote side, sync artifacts and SSH in. - if [[ -z "${TNT_REMOTE_EXEC:-}" ]]; then - parse_provisioning_host - if [[ -n "${PROV_SSH_TARGET}" ]]; then - build_ssh_opts - info "Baremetal TNF deployment — cluster: ${CLUSTER_NAME}" - info "Provisioning host: ${PROV_SSH_TARGET}" - echo "" - - # Validate artifacts locally (fast, catches errors before SSH) - validate_artifacts - validate_pull_secret - validate_config - echo "" - - validate_ssh_connectivity - sync_to_remote - echo "" - exec_on_remote - echo "" - echo "==========================================" - fetch_credentials - echo "" - info "Baremetal TNF cluster deployed via provisioning host!" - exit 0 - fi - fi - - # Local execution (on provisioning host or standalone) - info "Baremetal TNF deployment — cluster: ${CLUSTER_NAME}" - echo "" - - validate_sudo - validate_artifacts - validate_pull_secret - validate_config - echo "" - - setup_dev_scripts - echo "" - - # Run dev-scripts ABI pipeline. Includes 'requirements' to install system - # dependencies (nmstate, netaddr, ansible, etc.). Skips 'configure' - # (02_configure_host.sh) which creates libvirt networks and VMs. - info "Starting dev-scripts ABI pipeline..." - info "This will take 30-60 minutes on baremetal nodes." - echo "" - - if make -C "${DEV_SCRIPTS_PATH}" requirements agent_requirements agent_build_installer agent_prepare_release agent_configure agent_create_cluster; then - echo "" - info "Baremetal TNF cluster deployed successfully!" - info "Kubeconfig: ${DEV_SCRIPTS_PATH}/ocp/${CLUSTER_NAME}/auth/kubeconfig" - info "Console: https://console-openshift-console.apps.${CLUSTER_NAME}.$(grep -oP 'BASE_DOMAIN="\K[^"]+' "${CONFIG_FILE}" 2>/dev/null || echo '')/" - else - echo "" - echo "==========================================" - echo " DEPLOYMENT FAILED" - echo "==========================================" - echo "" - echo " To recover:" - echo " 1. Power off baremetal nodes via BMC" - echo " 2. Clean dev-scripts state:" - echo " make -C ${DEV_SCRIPTS_PATH} clean" - echo " 3. Fix the issue and re-run:" - echo " make baremetal-fencing-agent" - echo "" - exit 1 - fi -} - -main "$@" +INVENTORY="${OC_DIR}/inventory_baremetal.ini" +PLAYBOOK="${OC_DIR}/deploy-baremetal.yml" + +if [[ ! -f "${INVENTORY}" ]]; then + echo "Error: inventory_baremetal.ini not found in ${OC_DIR}/" + echo "Copy inventory_baremetal.ini.sample, fill in your node details," + echo "and configure the [provisioning_host] section." + exit 1 +fi + +if ! grep -qE '^\[provisioning_host\]' "${INVENTORY}"; then + echo "Error: [provisioning_host] section not found in ${INVENTORY}" + echo "Add a [provisioning_host] section with the target host." + exit 1 +fi + +if ! grep -qvE '^\s*(#|$|\[)' <(sed -n '/\[provisioning_host\]/,/^\[/p' "${INVENTORY}" | tail -n +2); then + echo "Error: [provisioning_host] section has no hosts configured." + echo "Uncomment and configure a host entry in ${INVENTORY}." + echo "For local deployment, add: localhost ansible_connection=local" + exit 1 +fi + +echo "Deploying baremetal TNF cluster via provisioning host..." + +cd "${OC_DIR}" +ansible-playbook "${PLAYBOOK}" -i "${INVENTORY}" "$@" From 0f098dcfef85bdf277d70da9a4b76da936644544 Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Thu, 2 Jul 2026 16:01:09 +0200 Subject: [PATCH 18/20] Fix baremetal deploy playbook: SSH keys, stale state, host deps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Generate SSH keypair on provisioning host if missing, inject SSH_PUB_KEY into dev-scripts config so cluster nodes accept SSH - Clean stale installer state (state.json, ISO, manifests) before running ABI pipeline to prevent release image SHA mismatches - Skip host dependency install (runc, containernetworking-plugins) for baremetal path — dev-scripts make requirements handles its own - Add test_cluster_name to play-level vars (include_role defaults don't persist to play scope with dynamic includes) - Fetch SSH private key alongside kubeconfig for node access - Pass method as task-level var on include_role (role vars/main.yml overrides play-level vars but not task-level vars) Co-Authored-By: Claude Opus 4.6 --- .../openshift-clusters/deploy-baremetal.yml | 45 +++++++++++++++++++ .../dev-scripts/install-dev/tasks/config.yml | 1 + 2 files changed, 46 insertions(+) diff --git a/deploy/openshift-clusters/deploy-baremetal.yml b/deploy/openshift-clusters/deploy-baremetal.yml index d75eb8ae..d764cad0 100644 --- a/deploy/openshift-clusters/deploy-baremetal.yml +++ b/deploy/openshift-clusters/deploy-baremetal.yml @@ -16,6 +16,7 @@ vars: method: agent topology: fencing + test_cluster_name: ostest pre_tasks: - name: Check adoption artifacts exist on controller @@ -45,8 +46,10 @@ name: dev-scripts/install-dev tasks_from: config vars: + method: agent config_file: agent: config_baremetal_fencing.sh + install_host_deps: false # --- Git checkout --- - name: Checkout dev-scripts @@ -72,12 +75,46 @@ regexp: '^export WORKING_DIR=' line: 'export WORKING_DIR="${HOME}/dev-scripts-workdir"' + # --- Ensure SSH key exists for node access --- + - name: Check for existing SSH keypair + ansible.builtin.stat: + path: ~/.ssh/id_ed25519 + register: ssh_key_check + + - name: Generate SSH keypair if missing + ansible.builtin.command: + cmd: ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519 -N '' + when: not ssh_key_check.stat.exists + + - name: Read SSH public key + ansible.builtin.slurp: + src: ~/.ssh/id_ed25519.pub + register: ssh_pub_key + + - name: Append SSH_PUB_KEY to deployed config + ansible.builtin.lineinfile: + path: "{{ dev_scripts_path }}/config_{{ whoami.stdout }}.sh" + regexp: '^export SSH_PUB_KEY=' + line: 'export SSH_PUB_KEY="{{ ssh_pub_key.content | b64decode | trim }}"' + - name: Create empty mirror registry credentials ansible.builtin.shell: | f="${HOME}/private-mirror-{{ test_cluster_name }}.json" [ -f "$f" ] || echo '{"auths":{}}' > "$f" changed_when: false + # --- Clean stale installer state from prior runs --- + - name: Remove stale installer state + ansible.builtin.file: + path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/{{ item }}" + state: absent + loop: + - .openshift_install_state.json + - .openshift_install.log + - agent.x86_64.iso + - cluster-manifests + - openshift + # --- Deploy --- - name: Run dev-scripts ABI pipeline block: @@ -119,9 +156,17 @@ dest: "clusters/{{ test_cluster_name }}/auth/kubeadmin-password" flat: true + - name: Fetch SSH private key to controller + ansible.builtin.fetch: + src: ~/.ssh/id_ed25519 + dest: "clusters/{{ test_cluster_name }}/auth/id_ed25519" + flat: true + mode: "0600" + - name: Display access information ansible.builtin.debug: msg: | Baremetal TNF cluster deployed successfully! Kubeconfig: clusters/{{ test_cluster_name }}/auth/kubeconfig Usage: export KUBECONFIG=$(pwd)/clusters/{{ test_cluster_name }}/auth/kubeconfig + SSH: ssh -i clusters/{{ test_cluster_name }}/auth/id_ed25519 core@ diff --git a/deploy/openshift-clusters/roles/dev-scripts/install-dev/tasks/config.yml b/deploy/openshift-clusters/roles/dev-scripts/install-dev/tasks/config.yml index c973118b..d320495a 100644 --- a/deploy/openshift-clusters/roles/dev-scripts/install-dev/tasks/config.yml +++ b/deploy/openshift-clusters/roles/dev-scripts/install-dev/tasks/config.yml @@ -125,3 +125,4 @@ - containernetworking-plugins state: present become: true + when: install_host_deps | default(true) From 62206c726de58f0810a121986190a65d4deb0add Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Fri, 3 Jul 2026 14:21:21 +0200 Subject: [PATCH 19/20] Add proxy-based cluster access for baremetal deployments Reuse the existing proxy-setup role (Squid forward proxy) to provide cluster access from the developer laptop through the provisioning host. Fix inventory_hostname extraction to support both user@host and plain host formats. Replace manual credential fetch in deploy-baremetal.yml with proxy-setup role inclusion. Co-Authored-By: Claude Opus 4.6 --- deploy/openshift-clusters/.gitignore | 1 + .../openshift-clusters/deploy-baremetal.yml | 40 +++++++------------ .../roles/proxy-setup/tasks/environment.yml | 2 +- 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/deploy/openshift-clusters/.gitignore b/deploy/openshift-clusters/.gitignore index 96b8f64c..c5ba337d 100644 --- a/deploy/openshift-clusters/.gitignore +++ b/deploy/openshift-clusters/.gitignore @@ -4,6 +4,7 @@ inventory_baremetal.ini proxy.env kubeconfig kubeadmin-password +clusters/ *.pyc *.pyo diff --git a/deploy/openshift-clusters/deploy-baremetal.yml b/deploy/openshift-clusters/deploy-baremetal.yml index d764cad0..592c5779 100644 --- a/deploy/openshift-clusters/deploy-baremetal.yml +++ b/deploy/openshift-clusters/deploy-baremetal.yml @@ -103,22 +103,16 @@ [ -f "$f" ] || echo '{"auths":{}}' > "$f" changed_when: false - # --- Clean stale installer state from prior runs --- - - name: Remove stale installer state + # --- Clean prior deployment state --- + - name: Remove prior cluster state (required by dev-scripts verifyClean) ansible.builtin.file: - path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/{{ item }}" + path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}" state: absent - loop: - - .openshift_install_state.json - - .openshift_install.log - - agent.x86_64.iso - - cluster-manifests - - openshift # --- Deploy --- - name: Run dev-scripts ABI pipeline block: - - name: "dev-scripts: {{ item }}" + - name: Run dev-scripts make target make: chdir: "{{ dev_scripts_path }}" target: "{{ item }}" @@ -129,6 +123,8 @@ - agent_prepare_release - agent_configure - agent_create_cluster + loop_control: + label: "{{ item }}" rescue: - name: Display recovery instructions ansible.builtin.debug: @@ -143,18 +139,13 @@ ansible.builtin.fail: msg: "dev-scripts ABI pipeline failed" - # --- Post-deploy: fetch credentials to controller --- - - name: Fetch kubeconfig to controller - ansible.builtin.fetch: - src: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeconfig" - dest: "clusters/{{ test_cluster_name }}/auth/kubeconfig" - flat: true - - - name: Fetch kubeadmin-password to controller - ansible.builtin.fetch: - src: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeadmin-password" - dest: "clusters/{{ test_cluster_name }}/auth/kubeadmin-password" - flat: true + # --- Post-deploy: proxy + credentials --- + - name: Setup proxy and fetch credentials + ansible.builtin.include_role: + name: proxy-setup + vars: + kubeconfig_path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeconfig" + kubeadmin_password_path: "{{ dev_scripts_path }}/ocp/{{ test_cluster_name }}/auth/kubeadmin-password" - name: Fetch SSH private key to controller ansible.builtin.fetch: @@ -167,6 +158,5 @@ ansible.builtin.debug: msg: | Baremetal TNF cluster deployed successfully! - Kubeconfig: clusters/{{ test_cluster_name }}/auth/kubeconfig - Usage: export KUBECONFIG=$(pwd)/clusters/{{ test_cluster_name }}/auth/kubeconfig - SSH: ssh -i clusters/{{ test_cluster_name }}/auth/id_ed25519 core@ + Access: source proxy.env && oc get nodes + SSH: ssh -i clusters/{{ test_cluster_name }}/auth/id_ed25519 core@ diff --git a/deploy/openshift-clusters/roles/proxy-setup/tasks/environment.yml b/deploy/openshift-clusters/roles/proxy-setup/tasks/environment.yml index 693b2032..1f1f9a18 100644 --- a/deploy/openshift-clusters/roles/proxy-setup/tasks/environment.yml +++ b/deploy/openshift-clusters/roles/proxy-setup/tasks/environment.yml @@ -7,7 +7,7 @@ # Determine the directory where this proxy.env file is located PROXY_ENV_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)" - export EC2_PUBLIC_IP={{ hostvars[inventory_hostname]['inventory_hostname'].split('@')[1] }} + export EC2_PUBLIC_IP={{ hostvars[inventory_hostname]['ansible_host'] | default(inventory_hostname.split('@')[1] if '@' in inventory_hostname else inventory_hostname) }} export PROXYPORT={{ proxy_port }} export HTTP_PROXY=http://${EC2_PUBLIC_IP}:${PROXYPORT}/ export HTTPS_PROXY=http://${EC2_PUBLIC_IP}:${PROXYPORT}/ From 01da195ed1aab0388d7052fb203247abe71e027c Mon Sep 17 00:00:00 2001 From: Pablo Fontanilla Date: Fri, 3 Jul 2026 18:05:29 +0200 Subject: [PATCH 20/20] Improve post-deploy access instructions for baremetal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Match the messaging style used by kcli-install.yml and the hypervisor deploy scripts — numbered steps, full playbook_dir path for sourcing proxy.env from anywhere. Co-Authored-By: Claude Opus 4.6 --- deploy/openshift-clusters/deploy-baremetal.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/deploy/openshift-clusters/deploy-baremetal.yml b/deploy/openshift-clusters/deploy-baremetal.yml index 592c5779..e6dc5a3d 100644 --- a/deploy/openshift-clusters/deploy-baremetal.yml +++ b/deploy/openshift-clusters/deploy-baremetal.yml @@ -156,7 +156,13 @@ - name: Display access information ansible.builtin.debug: - msg: | + msg: |- Baremetal TNF cluster deployed successfully! - Access: source proxy.env && oc get nodes - SSH: ssh -i clusters/{{ test_cluster_name }}/auth/id_ed25519 core@ + + Next steps: + 1. Source the proxy environment from anywhere: + source {{ playbook_dir }}/proxy.env + (or from openshift-clusters directory: source proxy.env) + 2. Verify cluster access: oc get nodes + 3. SSH to nodes: + ssh -i clusters/{{ test_cluster_name }}/auth/id_ed25519 core@