diff --git a/etc/kayobe/environments/ci-aio/controllers.yml b/etc/kayobe/environments/ci-aio/controllers.yml
index 12fe3afcb8..557d498606 100644
--- a/etc/kayobe/environments/ci-aio/controllers.yml
+++ b/etc/kayobe/environments/ci-aio/controllers.yml
@@ -4,7 +4,8 @@
# User with which to access the controllers via SSH during bootstrap, in order
# to setup the Kayobe user account. Default is {{ os_distribution }}.
-controller_bootstrap_user: "{{ os_distribution if os_distribution == 'ubuntu' else 'cloud-user' }}"
+#controller_bootstrap_user: "{{ os_distribution if os_distribution == 'ubuntu' else 'cloud-user' }}"
+controller_bootstrap_user: 'rocky'
controller_extra_network_interfaces:
- ethernet
diff --git a/etc/kayobe/environments/ci-aio/stackhpc.yml b/etc/kayobe/environments/ci-aio/stackhpc.yml
index a2c7858bb6..25ec90d95d 100644
--- a/etc/kayobe/environments/ci-aio/stackhpc.yml
+++ b/etc/kayobe/environments/ci-aio/stackhpc.yml
@@ -1,3 +1,3 @@
---
-stackhpc_enable_cis_benchmark_hardening_hook: true
+stackhpc_enable_cis_benchmark_hardening_hook: false
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-0-enroll-overcloud.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-0-enroll-overcloud.yml
new file mode 100644
index 0000000000..d2cbf0c05c
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-0-enroll-overcloud.yml
@@ -0,0 +1,83 @@
+---
+
+- name: Register baremetal compute nodes
+ hosts: localhost
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ tasks:
+ - name: Set up openstack cli virtualenv
+ pip:
+ virtualenv: "{{ venv }}"
+ name:
+ - python-openstackclient
+ - python-ironicclient
+ state: latest
+ virtualenv_command: "python3.{{ ansible_facts.python.version.minor }} -m venv"
+ extra_args: "{% if pip_upper_constraints_file %}-c {{ pip_upper_constraints_file }}{% endif %}"
+
+- name: Ensure overcloud baremetal nodes are registered in ironic
+ hosts: baremetal-overcloud
+ gather_facts: false
+ max_fail_percentage: >-
+ {{ baremetal_compute_register_max_fail_percentage |
+ default(baremetal_compute_max_fail_percentage) |
+ default(kayobe_max_fail_percentage) |
+ default(100) }}
+ tags:
+ - baremetal
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ controller_host: localhost
+ tasks:
+ - name: Check Ironic variables are defined
+ ansible.builtin.assert:
+ that:
+ - ironic_driver is defined
+ - ironic_driver_info is defined
+ - ironic_properties is defined
+ - ironic_resource_class is defined
+ fail_msg: One or more Ironic variables are undefined.
+
+ - block:
+ - name: Show baremetal node
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node show {{ inventory_hostname }}"
+ register: node_show
+ failed_when:
+ - '"HTTP 404" not in node_show.stderr'
+ - node_show.rc != 0
+ changed_when: false
+
+ # NOTE: The openstack.cloud.baremetal_node module cannot be used in this
+ # script due to requiring a MAC address pre-defined, instead, this should
+ # be discovered by inpsection following this script.
+ #
+ # NOTE: IPMI address must be passed with Redfish address to ensure existing
+ # Ironic nodes match with new nodes during inspection.
+ - name: Create baremetal nodes
+ ansible.builtin.shell:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node create \
+ --name {{ inventory_hostname }} \
+ --driver {{ ironic_driver }} \
+ {% for key, value in ironic_driver_info.items() %}
+ --driver-info {{ key }}={{ value }} \
+ {% endfor %}
+ {% for key, value in ironic_properties.items() %}
+ --property {{ key }}={{ value }} \
+ {% endfor %}
+ --resource-class {{ ironic_resource_class }}
+ when:
+ - node_show.rc != 0
+
+ - name: Manage baremetal nodes
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node manage {{ inventory_hostname }} --wait"
+ when:
+ - node_show.rc != 0
+ delegate_to: "{{ controller_host }}"
+ vars:
+ # NOTE: Without this, the controller's ansible_host variable will not
+ # be respected when using delegate_to.
+ ansible_host: "{{ hostvars[controller_host].ansible_host | default(controller_host) }}"
+ environment: "{{ openstack_auth_env }}"
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-1-check-bmc-up.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-1-check-bmc-up.yml
new file mode 100644
index 0000000000..aca295a7df
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-1-check-bmc-up.yml
@@ -0,0 +1,124 @@
+---
+- name: Check baremetal compute node bmc is up
+ hosts: baremetal
+ gather_facts: false
+ max_fail_percentage: >-
+ {{ baremetal_compute_register_max_fail_percentage |
+ default(baremetal_compute_max_fail_percentage) |
+ default(kayobe_max_fail_percentage) |
+ default(100) }}
+ tags:
+ - baremetal
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ controller_host: localhost
+
+ tasks:
+ - name: Check Ironic variables are defined
+ ansible.builtin.assert:
+ that:
+ - ironic_driver is defined
+ - ironic_driver_info is defined
+ - ironic_properties is defined
+ - ironic_resource_class is defined
+ fail_msg: One or more Ironic variables are undefined.
+
+ - name: Show and check baremetal node
+ delegate_to: "{{ controller_host }}"
+ vars:
+ # NOTE: Without this, the controller's ansible_host variable will not
+ # be respected when using delegate_to.
+ ansible_host: "{{ hostvars[controller_host].ansible_host | default(controller_host) }}"
+ environment: "{{ openstack_auth_env }}"
+ block:
+
+ - name: Show baremetal node
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node show {{ inventory_hostname }} -f json"
+ register: node_show
+ failed_when:
+ - node_show.rc != 0
+ changed_when: false
+
+ - name: Check if bmc is up
+ ansible.builtin.set_fact:
+ kayobe_bmc_up: "{{ (node_show.stdout | from_json)['extra'].get('kayobe_bmc_up') }}"
+ provision_state: "{{ (node_show.stdout | from_json)['provision_state'] }}"
+
+ - name: Output when bmc last up run
+ ansible.builtin.debug:
+ msg: "BMC for node {{ inventory_hostname }} was up at {{ kayobe_bmc_up }}."
+ when: kayobe_bmc_up != ""
+
+ - name: Check BMC is up
+ ansible.builtin.uri:
+ url: "{{ ironic_driver_info['redfish_address'] + '/redfish/v1' }}"
+ method: GET
+ status_code: 200
+ validate_certs: false
+ timeout: 10
+
+ - name: Get firmware inventory (to check redfish auth)
+ community.general.redfish_info:
+ category: Update
+ command: GetFirmwareInventory
+ baseuri: "{{ ironic_redfish_address }}"
+ username: "{{ ironic_redfish_username }}"
+ password: "{{ ironic_redfish_password }}"
+ register: firmware_inventory
+ failed_when: not firmware_inventory.redfish_facts.firmware.ret
+
+ # - name: Print fetched information
+ # ansible.builtin.debug:
+ # msg: "{{ firmware_inventory.redfish_facts.firmware | to_nice_json }}"
+
+ - name: Reboot BMC
+ community.general.redfish_command:
+ category: Manager
+ command: PowerReboot
+ resource_id: 1
+ baseuri: "{{ ironic_redfish_address }}"
+ username: "{{ ironic_redfish_username }}"
+ password: "{{ ironic_redfish_password }}"
+ when: kayobe_bmc_up == ""
+
+ - name: Wait 300 seconds for port 443 to become open
+ ansible.builtin.wait_for:
+ port: 443
+ host: "{{ ironic_redfish_address }}"
+ delay: 20
+ timeout: 300
+ when: kayobe_bmc_up == ""
+
+ - name: Check BMC back up again
+ ansible.builtin.uri:
+ url: "https://{{ ironic_driver_info['redfish_address'] }}"
+ method: GET
+ status_code: 200
+ validate_certs: false
+ timeout: 10
+ register: uri_output
+ until: uri_output.status == 200
+ delay: 5
+ retries: 24 # Retries for 24 * 5 seconds = 120 seconds = 2 minutes
+
+ - name: Note when we are able to reach the bmc, the first time
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --extra kayobe_bmc_up={{ now(utc=true, fmt='%Y-%m-%dT%H:%M:%SZ') }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ when: kayobe_bmc_up == ""
+
+ - name: Try move from enroll to manageable
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node manage {{ inventory_hostname }} --wait 300
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ when:
+ - provision_state == "enroll"
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-2-ensure-redfish-inspect.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-2-ensure-redfish-inspect.yml
new file mode 100644
index 0000000000..f06324df7b
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-2-ensure-redfish-inspect.yml
@@ -0,0 +1,86 @@
+---
+- name: Check baremetal compute node bmc is up
+ hosts: baremetal
+ gather_facts: false
+ max_fail_percentage: >-
+ {{ baremetal_compute_register_max_fail_percentage |
+ default(baremetal_compute_max_fail_percentage) |
+ default(kayobe_max_fail_percentage) |
+ default(100) }}
+ tags:
+ - baremetal
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ controller_host: localhost
+
+ tasks:
+ - name: Show and check baremetal node
+ delegate_to: "{{ controller_host }}"
+ vars:
+ # NOTE: Without this, the controller's ansible_host variable will not
+ # be respected when using delegate_to.
+ ansible_host: "{{ hostvars[controller_host].ansible_host | default(controller_host) }}"
+ redfish_inspect_timeout: 120
+ environment: "{{ openstack_auth_env }}"
+ block:
+
+ - name: Show baremetal node
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node show {{ inventory_hostname }} -f json"
+ register: node_show
+ failed_when:
+ - node_show.rc != 0
+ changed_when: false
+
+ - name: Check BMC is up
+ ansible.builtin.uri:
+ url: "{{ ironic_driver_info['redfish_address'] + '/redfish/v1' }}"
+ method: GET
+ status_code: 200
+ validate_certs: false
+ timeout: 10
+
+ - name: Check for redfish inspection details
+ ansible.builtin.set_fact:
+ kayobe_redfish_inspect_done: "{{ (node_show.stdout | from_json)['extra'].get('kayobe_redfish_inspect_done') }}"
+ inspect_interface: "{{ (node_show.stdout | from_json)['inspect_interface'] }}"
+ provision_state: "{{ (node_show.stdout | from_json)['provision_state'] }}"
+
+ - name: Output when redfish inspection was done
+ ansible.builtin.debug:
+ msg: "{{ inventory_hostname }} inspected at {{ kayobe_redfish_inspect_done }}."
+ when: kayobe_redfish_inspect_done != ""
+
+ - name: Fail if not redfish inspection
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has the wrong inspect_interface: {{ inspect_interface }}"
+ when:
+ - inspect_interface != "redfish"
+ - kayobe_redfish_inspect_done == ""
+
+ - name: Fail if not in manageable state
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has the wrong provision_state: {{ provision_state }}"
+ when:
+ - provision_state != "manageable"
+ - kayobe_redfish_inspect_done == ""
+
+ - name: Wait for inspection
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node inspect {{ inventory_hostname }} --wait {{ redfish_inspect_timeout }}
+ register: node_inspect
+ failed_when:
+ - node_inspect.rc != 0
+ changed_when: true
+ when: kayobe_redfish_inspect_done == ""
+
+ - name: Note when redfish inspection is done
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --extra kayobe_redfish_inspect_done={{ now(utc=true, fmt='%Y-%m-%dT%H:%M:%SZ') }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ when: kayobe_redfish_inspect_done == ""
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-3-ensure-agent-inspect.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-3-ensure-agent-inspect.yml
new file mode 100644
index 0000000000..19f7162ca3
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-3-ensure-agent-inspect.yml
@@ -0,0 +1,193 @@
+---
+- name: Check baremetal node bmc is up
+ hosts: baremetal
+ gather_facts: false
+ max_fail_percentage: >-
+ {{ baremetal_compute_register_max_fail_percentage |
+ default(baremetal_compute_max_fail_percentage) |
+ default(kayobe_max_fail_percentage) |
+ default(100) }}
+ tags:
+ - baremetal
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ controller_host: controller0
+
+ tasks:
+ - name: Show and check baremetal node
+ delegate_to: "{{ controller_host }}"
+ vars:
+ # NOTE: Without this, the controller's ansible_host variable will not
+ # be respected when using delegate_to.
+ ansible_host: "{{ hostvars[controller_host].ansible_host | default(controller_host) }}"
+ agent_inspect_timeout: "{{ 60 * 20 }}" # 20 minutes
+ environment: "{{ openstack_auth_env }}"
+ block:
+
+ - name: Show baremetal node
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node show {{ inventory_hostname }} -f json"
+ register: node_show
+ failed_when:
+ - node_show.rc != 0
+ changed_when: false
+
+ - name: Check BMC is up
+ ansible.builtin.uri:
+ url: "{{ ironic_driver_info['redfish_address'] + '/redfish/v1' }}"
+ method: GET
+ status_code: 200
+ validate_certs: false
+ timeout: 10
+
+ - name: Check for agent inspection details
+ ansible.builtin.set_fact:
+ kayobe_agent_inspect_done: "{{ (node_show.stdout | from_json)['extra'].get('kayobe_agent_inspect_done') }}"
+ inspect_interface: "{{ (node_show.stdout | from_json)['inspect_interface'] }}"
+ network_interface: "{{ (node_show.stdout | from_json)['network_interface'] }}"
+ provision_state: "{{ (node_show.stdout | from_json)['provision_state'] }}"
+
+ - name: Output when agent inspection was done
+ ansible.builtin.debug:
+ msg: "{{ inventory_hostname }} inspected at {{ kayobe_agent_inspect_done }}."
+ when: kayobe_agent_inspect_done != ""
+
+ - name: Fail if not in manageable state
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has the wrong provision_state: {{ provision_state }}"
+ when:
+ - provision_state not in ["manageable", "inspect failed"]
+ - kayobe_agent_inspect_done == ""
+
+ - name: If we failed inspect, move back to managable
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node manage {{ inventory_hostname }} --wait 300
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ # do it slowly due to ironic api issues
+ throttle: 3
+ when:
+ - provision_state == "inspect failed"
+ - kayobe_agent_inspect_done == ""
+
+ - name: Move to agent inspect interface
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --inspect-interface agent
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ # do it slowly due to ironic api issues
+ throttle: 3
+ when:
+ - kayobe_agent_inspect_done == ""
+ - inspect_interface == "redfish"
+
+ - name: Ensure we using the flat network interface and correct network
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --network-interface flat --driver-info inspection_network={{ ironic_flat_provisioning_network }} --driver-info cleaning_network={{ ironic_flat_provisioning_network }} --driver-info provision_network={{ ironic_flat_provisioning_network }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ # do it slowly due to ironic api issues
+ throttle: 3
+ when:
+ - kayobe_agent_inspect_done == ""
+
+ - name: Gather information about baremetal ports
+ openstack.cloud.baremetal_port_info:
+ node: "{{ inventory_hostname }}"
+ auth_type: "{{ openstack_auth_type }}"
+ auth: "{{ openstack_auth }}"
+ cacert: "{{ openstack_cacert | default(omit, true) }}"
+ interface: "{{ openstack_interface | default(omit, true) }}"
+ register: bmport
+ when: kayobe_agent_inspect_done == ""
+
+ - name: Disable PXE on all baremetal ports
+ openstack.cloud.baremetal_port:
+ address: "{{ item.address }}"
+ auth_type: "{{ openstack_auth_type }}"
+ auth: "{{ openstack_auth }}"
+ cacert: "{{ openstack_cacert | default(omit, true) }}"
+ interface: "{{ openstack_interface | default(omit, true) }}"
+ node: "{{ inventory_hostname }}"
+ is_pxe_enabled: false
+ loop: "{{ bmport.baremetal_ports }}"
+ when: kayobe_agent_inspect_done == ""
+
+ - name: Re-enable PXE on the first Mellanox ethernet NIC
+ openstack.cloud.baremetal_port:
+ address: "{{ bmport.baremetal_ports | selectattr('address', 'search', item) | map(attribute='address') | list | first }}"
+ auth_type: "{{ openstack_auth_type }}"
+ auth: "{{ openstack_auth }}"
+ cacert: "{{ openstack_cacert | default(omit, true) }}"
+ interface: "{{ openstack_interface | default(omit, true) }}"
+ node: "{{ inventory_hostname }}"
+ is_pxe_enabled: true
+ when:
+ - kayobe_agent_inspect_done == ""
+ - bmport.baremetal_ports | selectattr('address', 'search', item) | list | length > 0
+ # known mellanox ethernet NICs
+ loop:
+ - "^58:a2:e1"
+ - "^a0:88:c2"
+ - "^7c:8c:09"
+ - "^94:6d:ae"
+ - "^50:00:e6"
+ - "^b8:3f:d2"
+ - "^c4:70:bd"
+
+ - name: Wait for inspection
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node inspect {{ inventory_hostname }} --wait {{ agent_inspect_timeout }}
+ register: node_inspect
+ failed_when:
+ - node_inspect.rc != 0
+ changed_when: true
+ when: kayobe_agent_inspect_done == ""
+
+ - name: Move to neutron interface, assuming its a multi-tenant node
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --network-interface neutron
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ # do it slowly due to ironic api issues
+ throttle: 3
+ when:
+ - kayobe_agent_inspect_done == ""
+ - ironic_network_interface == "neutron"
+
+ - name: Remove network overrides for multi-tenant nodes
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node unset {{ inventory_hostname }} --driver-info inspection_network --driver-info cleaning_network --driver-info provision_network
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ # do it slowly due to ironic api issues
+ throttle: 3
+ when:
+ - kayobe_agent_inspect_done == ""
+ - ironic_network_interface == "neutron"
+
+ - name: Note when agent inspection is done
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --extra kayobe_agent_inspect_done={{ now(utc=true, fmt='%Y-%m-%dT%H:%M:%SZ') }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ when: kayobe_agent_inspect_done == ""
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-4-clean.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-4-clean.yml
new file mode 100644
index 0000000000..99d0609738
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-4-clean.yml
@@ -0,0 +1,112 @@
+---
+- name: Check baremetal node bmc is up
+ hosts: baremetal
+ gather_facts: false
+ max_fail_percentage: >-
+ {{ baremetal_compute_register_max_fail_percentage |
+ default(baremetal_compute_max_fail_percentage) |
+ default(kayobe_max_fail_percentage) |
+ default(100) }}
+ tags:
+ - baremetal
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ controller_host: localhost
+ cleaning_timeout: "{{ 60 * 20 }}" # 20 minutes
+
+ tasks:
+ - name: Show and check baremetal node
+ delegate_to: "{{ controller_host }}"
+ vars:
+ # NOTE: Without this, the controller's ansible_host variable will not
+ # be respected when using delegate_to.
+ ansible_host: "{{ hostvars[controller_host].ansible_host | default(controller_host) }}"
+ agent_inspect_timeout: "{{ 60 * 20 }}" # 20 minutes
+ environment: "{{ openstack_auth_env }}"
+ block:
+
+ - name: Show baremetal node
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node show {{ inventory_hostname }} -f json"
+ register: node_show
+ failed_when:
+ - node_show.rc != 0
+ changed_when: false
+
+ - name: Check BMC is up
+ ansible.builtin.uri:
+ url: "https://{{ ironic_driver_info['redfish_address'] }}"
+ method: GET
+ status_code: 200
+ validate_certs: false
+ timeout: 10
+
+ - name: Check for agent inspection details
+ ansible.builtin.set_fact:
+ kayobe_agent_inspect_done: "{{ (node_show.stdout | from_json)['extra'].get('kayobe_agent_inspect_done') }}"
+ kayobe_clean_done: "{{ (node_show.stdout | from_json)['extra'].get('kayobe_clean_done') }}"
+ network_interface: "{{ (node_show.stdout | from_json)['network_interface'] }}"
+ provision_state: "{{ (node_show.stdout | from_json)['provision_state'] }}"
+ node_maintenance: "{{ (node_show.stdout | from_json)['maintenance'] }}"
+
+ - name: Fail if not agent inspection done
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has not been inspected"
+ when:
+ - kayobe_agent_inspect_done == ""
+
+ - name: Fail if not in manageable or clean failed state
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has the wrong provision_state: {{ provision_state }}"
+ when:
+ - provision_state not in ["manageable", "clean failed"]
+ - kayobe_clean_done == ""
+
+ - name: If in clean failed, move back to managable
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node manage {{ inventory_hostname }} --wait 300
+ register: node_manage
+ failed_when:
+ - node_manage.rc != 0
+ when:
+ - provision_state == "clean failed"
+
+ - name: Remove node from maintenance
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node maintenance unset {{ inventory_hostname }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ when:
+ - node_maintenance
+ - provision_state in ["manageable", "clean failed"]
+
+ - name: Ensure hosts set to use software RAID
+ ansible.builtin.shell: |
+ openstack baremetal node set \
+ --target-raid-config {{ ironic_target_raid_config }} \
+ --raid-interface agent {{ inventory_hostname }}
+ when:
+ - kayobe_clean_done == ""
+
+ - name: Clean node and make it available
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node provide {{ inventory_hostname }} --wait {{ cleaning_timeout }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ when:
+ - kayobe_clean_done == "" or provision_state in ["clean failed"]
+
+ - name: Note when cleaning has completed
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --extra kayobe_clean_done={{ now(utc=true, fmt='%Y-%m-%dT%H:%M:%SZ') }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ when: kayobe_clean_done == ""
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-all.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-all.yml
new file mode 100644
index 0000000000..0d1e17e4dd
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/baremetal-all.yml
@@ -0,0 +1,11 @@
+---
+#- name: Ensure overcloud baremetal is enrolled
+# ansible.builtin.import_playbook: ./baremetal-0-enroll-overcloud.yml
+- name: Check BMC is up
+ ansible.builtin.import_playbook: ./baremetal-1-check-bmc-up.yml
+- name: Do redfish inspection
+ ansible.builtin.import_playbook: ./baremetal-2-ensure-redfish-inspect.yml
+- name: Do agent inspection
+ ansible.builtin.import_playbook: ./baremetal-3-ensure-agent-inspect.yml
+- name: Make baremetal node available
+ ansible.builtin.import_playbook: ./baremetal-4-clean.yml
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/diagnose-baremetal.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/diagnose-baremetal.yml
new file mode 100644
index 0000000000..ccc113fa21
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/diagnose-baremetal.yml
@@ -0,0 +1,94 @@
+---
+- name: Node enrolment health check
+ hosts: baremetal-compute
+ gather_facts: false
+ connection: local
+ vars:
+ ping_cmd: "ping -c1 -W1"
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ purge_faulty_baremetal: false
+ tasks:
+ - name: Ping BMC
+ ansible.builtin.command: "{{ ping_cmd }} {{ redfish_address }}"
+ register: bmc_ping
+ failed_when: redfish_address is not defined
+ changed_when: false
+
+ - name: Fetch bmnode info
+ openstack.cloud.baremetal_node_info:
+ name: "{{ inventory_hostname }}"
+ register: bmnode
+ failed_when: false
+ changed_when: false
+
+ - name: Evaluate node details
+ ansible.builtin.set_fact:
+ bmnode_details:
+ bmc_reachable: "{{ bmc_ping.rc == 0 | default(false) }}"
+ kayobe_agent_inspect_done_ts: "{{ bmnode.nodes[0].extra.get('kayobe_agent_inspect_done') }}"
+ kayobe_first_provision_ts: "{{ bmnode.nodes[0].extra.get('kayobe_first_provision') }}"
+ ironic_state: "{{ bmnode.nodes[0].provision_state | default('unknown') }}"
+ ironic_power: "{{ bmnode.nodes[0].power_state | default('unknown') }}"
+ ironic_maintenance: "{{ bmnode.nodes[0].maintenance | default('unknown') }}"
+ ironic_last_error: "{{ bmnode.nodes[0].last_error | default('') }}"
+
+ - name: Evaluate enrolment status and hints
+ ansible.builtin.set_fact:
+ enrolment_status: "{{ bmnode_status }}"
+ enrolment_report_entry:
+ node: "{{ inventory_hostname }}"
+ status: "{{ bmnode_status }}"
+ details: "{{ bmnode_details }}"
+ vars:
+ bmnode_status: >-
+ {% if not bmnode_details.bmc_reachable %}
+ Can't ping node BMC
+ {% elif bmnode_details.kayobe_first_provision_ts %}
+ Node has completed Kayobe node prep for prod pipeline — check Ironic for details
+ {% elif bmnode_details.ironic_state == 'enroll' %}
+ Node stuck in Ironic node enroll state
+ {% elif not bmnode_details.kayobe_agent_inspect_done_ts %}
+ Node has not yet passed agent inspection
+ {% else %}
+ Node not yet completed node prep for prod pipeline
+ {% endif %}
+ changed_when: false
+
+ - name: Show diagnosis statement
+ debug:
+ msg: "Status: {{ enrolment_report_entry.status }}"
+
+ - name: Print diagnosis
+ ansible.builtin.debug:
+ msg:
+ - "Node: {{ enrolment_report_entry.node }}"
+ - "Status: {{ enrolment_report_entry.status }}"
+ - "Details: {{ enrolment_report_entry.details }}"
+
+ - name: Test faulty baremetal
+ block:
+ - name: Undeploy baremetal node (test BMC connection)
+ ansible.builtin.command: "{{ venv }}/bin/openstack baremetal node undeploy {{ inventory_hostname }} --wait"
+ failed_when: false
+
+ - name: Manage baremetal node (test BMC connection)
+ ansible.builtin.command: "{{ venv }}/bin/openstack baremetal node manage {{ inventory_hostname }} --wait"
+ failed_when: false
+
+ - name: Bye bye baremetal
+ ansible.builtin.command: "{{ venv }}/bin/openstack baremetal node delete {{ inventory_hostname }}"
+ when:
+ - "{{ purge_faulty_baremetal }}"
+ - "{{ bmnode_details.ironic_state not in ['available', 'active'] }}"
+ - "{{ not bmnode_details.kayobe_first_provision_ts }}"
+
+ #- name: Build summary list
+ # ansible.builtin.set_fact:
+ # enrolment_report_all: "{{ (enrolment_report_all | default([])) + [ hostvars[item].enrolment_report_entry ] }}"
+ # run_once: true
+ # loop: "{{ groups['all'] | sort }}"
+
+ #- name: Pretty-print all nodes
+ # ansible.builtin.debug:
+ # msg: "{{ enrolment_report_all | to_nice_json }}"
+ # run_once: true
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/download-host-image.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/download-host-image.yml
new file mode 100644
index 0000000000..2dde717b35
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/download-host-image.yml
@@ -0,0 +1,86 @@
+---
+- name: Download an overcloud host image from Ark
+ hosts: controllers
+ vars:
+ # This var is an edited version of stackhpc_overcloud_host_image_url
+ # without the auth credentials in it. Auth is handled by username and
+ # password in the get_url task of this playbook
+ stackhpc_overcloud_host_image_url_no_auth: "{{ stackhpc_release_pulp_content_url }}/kayobe-images/\
+ {{ openstack_release }}/{{ os_distribution }}/{{ os_release }}/\
+ {{ stackhpc_overcloud_host_image_version }}/\
+ overcloud-{{ os_distribution }}-{{ os_release }}.qcow2"
+ overcloud_host_image_name: "overcloud-{{ os_distribution }}-{{ os_release }}-{{ stackhpc_rocky_9_overcloud_host_image_version }}"
+ overcloud_host_image_ironic: false
+ overcloud_host_image_glance: true
+ tasks:
+ - name: Print image information
+ ansible.builtin.debug:
+ msg: |
+ OS Distribution: {{ os_distribution }}
+ OS Release: {{ os_release }}
+ Image tag: {{ stackhpc_overcloud_host_image_version }}
+
+ - name: Install dependencies
+ ansible.builtin.pip:
+ name: openstacksdk
+ state: latest
+
+ - name: Download image artifact
+ ansible.builtin.get_url:
+ url: "{{ stackhpc_overcloud_host_image_url_no_auth }}"
+ username: "{{ stackhpc_release_pulp_username }}"
+ password: "{{ stackhpc_release_pulp_password }}"
+ force_basic_auth: true
+ unredirected_headers:
+ - Authorization
+ dest: /tmp/{{ overcloud_host_image_name }}.qcow2
+ mode: "0644"
+ register: image_download_result
+ until: image_download_result.status_code in [200, 304]
+ retries: 3
+ delay: 60
+
+ #NOTE(jake): It would be nice to get the *real* checksum from Ark eventually.
+ - name: Get checksum of file
+ ansible.builtin.stat:
+ path: /tmp/{{ overcloud_host_image_name }}.qcow2
+ checksum_algorithm: sha256
+ register: host_image
+
+ - name: Write checksum to vars file
+ ansible.builtin.copy:
+ content: |
+ ---
+ # This file is autogenerated by Ansible; DO NOT EDIT!
+
+ stackhpc_overcloud_host_image_name: "{{ overcloud_host_image_name }}"
+ stackhpc_overcloud_host_image_checksum: "{{ host_image.stat.checksum }}"
+ dest: "{{ kayobe_env_config_path }}/stackhpc-overcloud-host-image.yml"
+ delegate_to: localhost
+ run_once: true
+
+ - block:
+ - name: Copy the image to the Ironic volume
+ ansible.builtin.copy:
+ src: /tmp/{{ overcloud_host_image_name }}.qcow2
+ dest: /var/lib/docker/volumes/ironic/_data/{{ overcloud_host_image_name }}.qcow2
+ remote_src: true
+ become: true
+
+ - name: Make the image available to the Ironic HTTP container
+ community.docker.docker_container_exec:
+ container: ironic_http
+ command: "mv /var/lib/ironic/{{ overcloud_host_image_name }}.qcow2 /var/lib/ironic/httpboot"
+ become: true
+ when: overcloud_host_image_ironic | bool
+
+ - name: Upload an image to Glance
+ openstack.cloud.image:
+ name: "{{ overcloud_host_image_name }}"
+ container_format: bare
+ disk_format: qcow2
+ state: present
+ filename: /tmp/{{ overcloud_host_image_name }}.qcow2
+ run_once: true
+ environment: "{{ openstack_auth_env }}"
+ when: overcloud_host_image_glance | bool
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/provision-overcloud-nova.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/provision-overcloud-nova.yml
new file mode 100644
index 0000000000..7a0407a503
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/provision-overcloud-nova.yml
@@ -0,0 +1,84 @@
+---
+- name: Provision baremetal instances
+ hosts: baremetal
+ gather_facts: false
+ vars:
+ network: "{{ ironic_provision_network }}"
+ flavor: "{{ ironic_provision_flavor }}"
+ image: "{{ ironic_provision_image }}"
+ key_name: "{{ ironic_provision_key_name }}"
+ force_server_delete: false
+ controller_host: localhost
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ tasks:
+ - name: Show and check baremetal node
+ delegate_to: "{{ controller_host }}"
+ vars:
+ # NOTE: Without this, the controller's ansible_host variable will not
+ # be respected when using delegate_to.
+ ansible_host: "{{ hostvars[controller_host].ansible_host | default(controller_host) }}"
+ environment: "{{ openstack_auth_env }}"
+ block:
+ - name: Gather information about baremetal nodes
+ openstack.cloud.baremetal_node_info:
+ name: "{{ inventory_hostname }}"
+ register: bmnode_raw
+
+ - name: Set fact for baremetal node information
+ ansible.builtin.set_fact:
+ bmnode: "{{ bmnode_raw.nodes | first }}"
+
+ - name: Check if for first provision done
+ ansible.builtin.set_fact:
+ kayobe_clean_done: "{{ bmnode.extra.get('kayobe_clean_done') }}"
+ kayobe_agent_inspect_done: "{{ bmnode.extra.get('kayobe_agent_inspect_done') }}"
+ kayobe_first_provision_done: "{{ bmnode.extra.get('kayobe_first_provision') }}"
+
+ # TODO: we should really check for cleaned
+ - name: Fail if kayobe bootstrap not done
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has not been bootstrapped yet."
+ when:
+ - kayobe_agent_inspect_done == ""
+
+ - name: Fail if not in available or active
+ ansible.builtin.fail:
+ msg: "{{ inventory_hostname }} has the wrong provision_state: {{ bmnode.provision_state }}"
+ when:
+ - bmnode.provision_state not in ["active", "available"]
+
+ - name: Create port
+ openstack.cloud.port:
+ state: "{{ 'absent' if force_server_delete else 'present' }}"
+ name: "{{ inventory_hostname }}"
+ network: "{{ network }}"
+ fixed_ips: "{{ ironic_provision_fixed_ips | default(omit) }}"
+ vnic_type: baremetal
+ delegate_to: localhost
+ register: bmport
+
+ # TODO: we should wait till we can reach ssh, via jump host
+ - name: Deploy Server
+ openstack.cloud.server:
+ state: "{{ 'absent' if force_server_delete else 'present' }}"
+ name: "{{ inventory_hostname }}"
+ nics:
+ - port-id: "{{ bmport.port.id }}"
+ image: "{{ image }}"
+ flavor: "{{ flavor }}"
+ key_name: "{{ key_name }}"
+ availability_zone: "::{{ bmnode.id }}"
+ timeout: 1800 # wait 30 mins for build
+ config_drive: yes
+ delegate_to: localhost
+ register: server
+
+ - name: Note when provision has first worked
+ ansible.builtin.command:
+ cmd: |
+ {{ venv }}/bin/openstack baremetal node set {{ inventory_hostname }} --extra kayobe_first_provision={{ now(utc=true, fmt='%Y-%m-%dT%H:%M:%SZ') }}
+ register: node_set
+ failed_when:
+ - node_set.rc != 0
+ changed_when: true
+ when: kayobe_first_provision_done == ""
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ansible/recover-baremetal.yml b/etc/kayobe/environments/stackhpc-baremetal/ansible/recover-baremetal.yml
new file mode 100644
index 0000000000..609a413c82
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ansible/recover-baremetal.yml
@@ -0,0 +1,61 @@
+---
+- name: Recover baremetal machines
+ hosts: baremetal-compute
+ gather_facts: false
+ connection: local
+ vars:
+ venv: "{{ virtualenv_path }}/openstack-cli"
+ controller_host: localhost
+ tasks:
+ - name: Gather information about baremetal nodes
+ openstack.cloud.baremetal_node_info:
+ name: "{{ inventory_hostname }}"
+ register: bmnode
+
+ - name: Set facts for provision state
+ ansible.builtin.set_fact:
+ bmnode_prov: "{{ bmnode.nodes[0].provision_state }}"
+
+ - name: Recover BMC
+ block:
+ - name: Reboot BMC
+ community.general.redfish_command:
+ category: Manager
+ command: PowerReboot
+ resource_id: 1
+ baseuri: "{{ ironic_redfish_address }}"
+ username: "{{ ironic_redfish_username }}"
+ password: "{{ ironic_redfish_password }}"
+
+ - name: Wait 300 seconds for port 443 to become open
+ ansible.builtin.wait_for:
+ port: 443
+ host: "{{ ironic_redfish_address }}"
+ delay: 20
+ timeout: 300
+
+ - name: Check BMC back up again
+ ansible.builtin.uri:
+ url: "https://{{ ironic_driver_info['redfish_address'] }}"
+ method: GET
+ status_code: 200
+ validate_certs: false
+ timeout: 10
+ register: uri_output
+ until: uri_output.status == 200
+ delay: 5
+ retries: 24 # Retries for 24 * 5 seconds = 120 seconds = 2 minutes
+ when: bmnode_prov in ['deploy failed', 'error', 'clean failed']
+
+ - name: Manage baremetals in 'clean failed' state
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node manage {{ inventory_hostname }}"
+ when: bmnode_prov in 'clean failed'
+
+ - name: Undeploy baremetals in 'deploy failed' or 'error' state
+ ansible.builtin.command:
+ cmd: "{{ venv }}/bin/openstack baremetal node undeploy {{ inventory_hostname }}"
+ when: bmnode_prov in ['deploy failed', 'error']
+
+- name: Make baremetal nodes available
+ ansible.builtin.import_playbook: ./baremetal-4-clean.yml
diff --git a/etc/kayobe/environments/stackhpc-baremetal/inventory/group_vars/baremetal-redfish/ironic b/etc/kayobe/environments/stackhpc-baremetal/inventory/group_vars/baremetal-redfish/ironic
new file mode 100644
index 0000000000..b3ffa9613c
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/inventory/group_vars/baremetal-redfish/ironic
@@ -0,0 +1,19 @@
+---
+
+ironic_driver: redfish
+
+ironic_driver_info:
+ redfish_system_id: "{{ ironic_redfish_system_id }}"
+ redfish_address: "http://192.168.33.3:34343"
+ redfish_username: "{{ ironic_redfish_username }}"
+ redfish_password: "{{ ironic_redfish_password }}"
+ redfish_verify_ca: "{{ ironic_redfish_verify_ca }}"
+
+ironic_redfish_verify_ca: false
+ironic_redfish_address: "{{ redfish_address }}"
+ironic_redfish_system_id:
+ironic_redfish_username: "{{ secrets_redfish_baremetal_username | default('') }}"
+ironic_redfish_password: "{{ secrets_redfish_baremetal_password | default('') }}"
+ironic_resource_class: "example_resource_class"
+ironic_capabilities: "boot_option:local,boot_mode:uefi"
+ironic_properties: {}
\ No newline at end of file
diff --git a/etc/kayobe/environments/stackhpc-baremetal/inventory/groups b/etc/kayobe/environments/stackhpc-baremetal/inventory/groups
new file mode 100644
index 0000000000..bf0c4061d4
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/inventory/groups
@@ -0,0 +1,8 @@
+[baremetal-overcloud]
+
+[baremetal-redfish:children]
+baremetal-compute
+baremetal-overcloud
+
+[baremetal:children]
+baremetal-redfish
diff --git a/etc/kayobe/environments/stackhpc-baremetal/inventory/hosts b/etc/kayobe/environments/stackhpc-baremetal/inventory/hosts
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/etc/kayobe/environments/stackhpc-baremetal/ironic.yml b/etc/kayobe/environments/stackhpc-baremetal/ironic.yml
new file mode 100644
index 0000000000..60d830bb1f
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/ironic.yml
@@ -0,0 +1,135 @@
+---
+###############################################################################
+# Ironic configuration.
+
+# Specify the list of hardware types to load during service initialization.
+kolla_ironic_enabled_hardware_types:
+ - redfish
+
+# Specify the list of bios interfaces to load during service initialization.
+#kolla_ironic_enabled_bios_interfaces:
+
+# Default bios interface to be used for nodes that do not have bios_interface
+# field set.
+#kolla_ironic_default_bios_interface:
+
+# Specify the list of boot interfaces to load during service initialization.
+kolla_ironic_enabled_boot_interfaces:
+ - redfish-virtual-media
+ - redfish-https
+
+# Default boot interface to be used for nodes that do not have boot_interface
+# field set.
+kolla_ironic_default_boot_interface: redfish-virtual-media
+
+# Specify the list of console interfaces to load during service initialization.
+#kolla_ironic_enabled_console_interfaces:
+
+# Default console interface to be used for nodes that do not have
+# console_interface field set.
+#kolla_ironic_default_console_interface:
+
+# Specify the list of deploy interfaces to load during service initialization.
+#kolla_ironic_enabled_deploy_interfaces:
+
+# Default deploy interface to be used for nodes that do not have
+# deploy_interface field set.
+#kolla_ironic_default_deploy_interface:
+
+# Specify the list of inspect interfaces to load during service initialization.
+kolla_ironic_enabled_inspect_interfaces:
+ - redfish
+ - agent
+
+# Default inspect interface to be used for nodes that do not have
+# inspect_interface field set.
+kolla_ironic_default_inspect_interface: redfish
+
+# Specify the list of management interfaces to load during service
+# initialization.
+kolla_ironic_enabled_management_interfaces:
+ - redfish
+
+# Default management interface to be used for nodes that do not have
+# management_interface field set.
+#kolla_ironic_default_management_interface:
+
+# Specify the list of network interfaces to load during service initialization.
+kolla_ironic_enabled_network_interfaces:
+ - neutron
+ - flat
+ - noop
+
+# Default network interface to be used for nodes that do not have
+# network_interface field set.
+kolla_ironic_default_network_interface: neutron
+
+# Specify the list of power interfaces to load during service initialization.
+kolla_ironic_enabled_power_interfaces:
+ - redfish
+
+# Default power interface to be used for nodes that do not have power_interface
+# field set.
+#kolla_ironic_default_power_interface:
+
+# Specify the list of raid interfaces to load during service initialization.
+#kolla_ironic_enabled_raid_interfaces:
+
+# Default raid interface to be used for nodes that do not have
+# raid_interface field set.
+kolla_ironic_default_raid_interface: agent
+
+# Specify the list of rescue interfaces to load during service initialization.
+#kolla_ironic_enabled_rescue_interfaces:
+
+# Default rescue interface to be used for nodes that do not have
+# rescue_interface field set.
+#kolla_ironic_default_rescue_interface:
+
+# Specify the list of storage interfaces to load during
+# service initialization.
+#kolla_ironic_enabled_storage_interfaces:
+
+# Default storage interface to be used for nodes that do not
+# have storage_interface field set.
+#kolla_ironic_default_storage_interface:
+
+# Specify the list of vendor interfaces to load during service initialization.
+#kolla_ironic_enabled_vendor_interfaces:
+
+# Default vendor interface to be used for nodes that do not have
+# vendor_interface field set.
+#kolla_ironic_default_vendor_interface:
+
+# Name of the Neutron network to use for cleaning.
+#kolla_ironic_cleaning_network:
+
+# Name of the Neutron network to use for provisioning.
+#kolla_ironic_provisioning_network:
+
+# List of default kernel parameters to append for baremetal PXE boot.
+#kolla_ironic_pxe_append_params_default:
+
+# List of additional kernel parameters to append for baremetal PXE boot.
+#kolla_ironic_pxe_append_params_extra:
+
+# List of kernel parameters to append for baremetal PXE boot.
+#kolla_ironic_pxe_append_params:
+
+###############################################################################
+# Ironic Node Configuration
+
+# Whether or not to enable the serial consoles on post configure
+#ironic_serial_console_autoenable:
+
+# This defines the start of the range of TCP ports to used for the IPMI socat
+# serial consoles
+#ironic_serial_console_tcp_pool_start:
+
+# This defines the end of the range of TCP ports to used for the IPMI socat
+# serial consoles
+#ironic_serial_console_tcp_pool_end:
+
+###############################################################################
+# Dummy variable to allow Ansible to accept this file.
+workaround_ansible_issue_8743: yes
diff --git a/etc/kayobe/environments/stackhpc-baremetal/kolla.yml b/etc/kayobe/environments/stackhpc-baremetal/kolla.yml
new file mode 100644
index 0000000000..cfb6870185
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/kolla.yml
@@ -0,0 +1,3 @@
+---
+
+kolla_enable_ironic: true
\ No newline at end of file
diff --git a/etc/kayobe/environments/stackhpc-baremetal/kolla/config/ironic/ironic.conf b/etc/kayobe/environments/stackhpc-baremetal/kolla/config/ironic/ironic.conf
new file mode 100644
index 0000000000..538c103518
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/kolla/config/ironic/ironic.conf
@@ -0,0 +1,74 @@
+{% raw %}
+{% set internal_net_ip = (internal_net_ips).get(inventory_hostname) %}
+
+[DEFAULT]
+# avoid checksum issues, do convert on deploy node
+force_raw_images = false
+# Avoid some timeouts of heartbeats and vif deletes
+rpc_response_timeout = 360
+
+[conductor]
+automated_clean = true
+bootloader = file:///var/lib/ironic/httpboot/esp.img
+deploy_kernel = file:///var/lib/ironic/httpboot/ironic-agent.kernel
+deploy_ramdisk = file:///var/lib/ironic/httpboot/ironic-agent.initramfs
+
+# We have busy conductors failing to heartbeat
+# Default is 10 secs
+heartbeat_interval = 30
+# Default is 60 seconds
+heartbeat_timeout = 360
+sync_local_state_interval = 360
+
+# Normally this is 100. We see eventlet threads
+# not making much progress, to for saftey reduce
+# this by half, should leave work on rabbit queu
+workers_pool_size = 50
+# Normally this is 8, keep it same
+period_max_workers = 8
+
+# Increase power sync interval to reduce load
+sync_power_state_interval = 120
+power_failure_recovery_interval = 120
+# Stop checking for orphan allocations for now
+check_allocations_interval = 120
+
+# Wait much longer before provision timeout check, to reduce background load
+# The default is 60 seconds
+check_provision_state_interval = 120
+check_rescue_state_interval = 120
+
+[database]
+# Usually this is 50, reduce to stop DB connection timeouts
+# and instead just make eventlet threads wait a bit longer
+max_overflow = 5
+# By default this is 30 seconds, but as we reduce
+# the pool overflow, some people will need to wait longer
+pool_timeout = 60
+
+[neutron]
+# Increase the neutron client timeout to allow for the slow management
+# switches.
+timeout = 300
+request_timeout = 300
+
+[glance]
+# Retry image download at least once if failure
+num_retries = 1
+
+[neutron]
+inspection_network = "{{ inspection_net_name | default('inspect-net' )}}"
+
+[redfish]
+kernel_append_params = nofb nomodeset vga=normal console=tty0 console=ttyS0,115200n8 ipa-insecure=1 {% if internal_net_ip %}ipa-ntp-server={{ internal_net_ip }}{% endif %}
+
+[inspector]
+extra_kernel_params = ipa-collect-lldp=1 ipa-inspection-collectors=default,logs,pci-devices ipa-insecure=1
+hooks = ramdisk-error,validate-interfaces,ports,local-link-connection,parse-lldp,root-device,cpu-capabilities,architecture
+add_ports = all
+
+[pxe]
+# 100GB size 4 weeks ttl
+image_cache_size = 95367
+image_cache_ttl = 40320
+{% endraw %}
diff --git a/etc/kayobe/environments/stackhpc-baremetal/kolla/ironic/ironic.conf b/etc/kayobe/environments/stackhpc-baremetal/kolla/ironic/ironic.conf
new file mode 100644
index 0000000000..538c103518
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-baremetal/kolla/ironic/ironic.conf
@@ -0,0 +1,74 @@
+{% raw %}
+{% set internal_net_ip = (internal_net_ips).get(inventory_hostname) %}
+
+[DEFAULT]
+# avoid checksum issues, do convert on deploy node
+force_raw_images = false
+# Avoid some timeouts of heartbeats and vif deletes
+rpc_response_timeout = 360
+
+[conductor]
+automated_clean = true
+bootloader = file:///var/lib/ironic/httpboot/esp.img
+deploy_kernel = file:///var/lib/ironic/httpboot/ironic-agent.kernel
+deploy_ramdisk = file:///var/lib/ironic/httpboot/ironic-agent.initramfs
+
+# We have busy conductors failing to heartbeat
+# Default is 10 secs
+heartbeat_interval = 30
+# Default is 60 seconds
+heartbeat_timeout = 360
+sync_local_state_interval = 360
+
+# Normally this is 100. We see eventlet threads
+# not making much progress, to for saftey reduce
+# this by half, should leave work on rabbit queu
+workers_pool_size = 50
+# Normally this is 8, keep it same
+period_max_workers = 8
+
+# Increase power sync interval to reduce load
+sync_power_state_interval = 120
+power_failure_recovery_interval = 120
+# Stop checking for orphan allocations for now
+check_allocations_interval = 120
+
+# Wait much longer before provision timeout check, to reduce background load
+# The default is 60 seconds
+check_provision_state_interval = 120
+check_rescue_state_interval = 120
+
+[database]
+# Usually this is 50, reduce to stop DB connection timeouts
+# and instead just make eventlet threads wait a bit longer
+max_overflow = 5
+# By default this is 30 seconds, but as we reduce
+# the pool overflow, some people will need to wait longer
+pool_timeout = 60
+
+[neutron]
+# Increase the neutron client timeout to allow for the slow management
+# switches.
+timeout = 300
+request_timeout = 300
+
+[glance]
+# Retry image download at least once if failure
+num_retries = 1
+
+[neutron]
+inspection_network = "{{ inspection_net_name | default('inspect-net' )}}"
+
+[redfish]
+kernel_append_params = nofb nomodeset vga=normal console=tty0 console=ttyS0,115200n8 ipa-insecure=1 {% if internal_net_ip %}ipa-ntp-server={{ internal_net_ip }}{% endif %}
+
+[inspector]
+extra_kernel_params = ipa-collect-lldp=1 ipa-inspection-collectors=default,logs,pci-devices ipa-insecure=1
+hooks = ramdisk-error,validate-interfaces,ports,local-link-connection,parse-lldp,root-device,cpu-capabilities,architecture
+add_ports = all
+
+[pxe]
+# 100GB size 4 weeks ttl
+image_cache_size = 95367
+image_cache_ttl = 40320
+{% endraw %}
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/.kayobe-environment b/etc/kayobe/environments/stackhpc-sushy-baremetal/.kayobe-environment
new file mode 100644
index 0000000000..0a2a5f6995
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/.kayobe-environment
@@ -0,0 +1,5 @@
+---
+
+dependencies:
+ - ci-aio
+ - stackhpc-baremetal
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/create-virtual-baremetal.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/create-virtual-baremetal.yml
new file mode 100644
index 0000000000..60c5d11679
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/create-virtual-baremetal.yml
@@ -0,0 +1,52 @@
+---
+- name: Create Libvirt vBMC nodes
+ gather_facts: false
+ become: true
+ hosts: sushy-libvirt
+ tasks:
+ - name: Gather facts
+ setup:
+ delegate_to: localhost
+
+ - name: Install Python requirements
+ ansible.builtin.pip:
+ name:
+ - libvirt-python==11.3.0
+ - lxml==5.4.0
+ delegate_to: localhost
+
+ - name: Define vBMC storage pool
+ community.libvirt.virt_pool:
+ command: define
+ name: default
+ xml: "{{ lookup('template', sushy_directory + '/vbmc-pool.xml.j2') }}"
+ delegate_to: localhost
+ run_once: true
+
+ - name: Start vBMC storage pool
+ community.libvirt.virt_pool:
+ state: active
+ name: default
+ delegate_to: localhost
+ run_once: true
+
+ - import_role:
+ name: stackhpc.libvirt-vm
+ vars:
+ libvirt_vm_arch: x86_64
+ libvirt_vms:
+ - state: present
+ name: "{{ inventory_hostname }}"
+ xml_file: "{{ sushy_directory }}/vbmc-node.xml.j2"
+ volumes:
+ - name: '{{ inventory_hostname }}.qcow2'
+ device: 'disk'
+ format: 'qcow2'
+ capacity: '20GB'
+ pool: 'default'
+ interfaces:
+ - network: 'breth1'
+ start: false
+ autostart: false
+ boot_firmware: uefi
+ delegate_to: localhost
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/generate-mac-addresses.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/generate-mac-addresses.yml
new file mode 100644
index 0000000000..24d62cdde7
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/generate-mac-addresses.yml
@@ -0,0 +1,21 @@
+---
+- name: Generate virtual baremetal MAC addresses
+ hosts: sushy-libvirt
+ gather_facts: false
+ tasks:
+ - name: Write hosts
+ block:
+ - name: Create address dictionary
+ set_fact:
+ bikolla_mac_addresses: "{{ bikolla_mac_addresses | combine({item: '52:54:00' | community.general.random_mac}) }}"
+ vars:
+ bikolla_mac_addresses: {}
+ delegate_to: "{{ sushy_host | default('localhost') }}"
+ with_items: "{{ play_hosts }}"
+
+ - name: Write mac addresses file
+ copy:
+ content: '{{ {"bikolla_mac_addresses": bikolla_mac_addresses} | to_nice_yaml }}'
+ dest: "{{ kayobe_env_config_path }}/bikolla-mac-addresses.yml"
+ delegate_to: "{{ sushy_host | default('localhost') }}"
+ run_once: true
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/setup-local-link-information.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/setup-local-link-information.yml
new file mode 100644
index 0000000000..b6d512db1f
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/setup-local-link-information.yml
@@ -0,0 +1,27 @@
+---
+- name: Setup fake local_link_information
+ hosts: sushy-libvirt-ipxe
+ gather_facts: false
+ tasks:
+ - name: Get baremetal port metadata
+ openstack.cloud.baremetal_port_info:
+ node: "{{ inventory_hostname }}"
+ delegate_to: localhost
+ register: baremetal
+
+ - name: Setup local_link_information
+ debug:
+ msg: "{{ baremetal.ports[0] }}"
+
+ - name: Update baremetal port
+ openstack.cloud.baremetal_port:
+ state: present
+ id: "{{ baremetal.ports[0].id }}"
+ node: "{{ inventory_hostname }}"
+ address: "{{ bikolla_mac_addresses[inventory_hostname] }}"
+ is_pxe_enabled: true
+ local_link_connection:
+ switch_id: "{{ bikolla_mac_addresses[inventory_hostname] }}"
+ port_id: "{{ baremetal.ports[0].id }}"
+ switch_info: "{{ inventory_hostname }}"
+ delegate_to: "{{ sushy_host | default('localhost') }}"
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushy-emulator.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushy-emulator.yml
new file mode 100644
index 0000000000..faabeb4d3e
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushy-emulator.yml
@@ -0,0 +1,66 @@
+---
+- name: Ensure Sushy Emulator is deployed
+ hosts: "{{ sushy_host | default('localhost') }}"
+ become: true
+ gather_facts: false
+ tasks:
+ - name: Create Sushy configuration directory
+ ansible.builtin.file:
+ path: /etc/sushy
+ state: directory
+
+ - name: Template Sushy configuration
+ ansible.builtin.template:
+ src: "{{ sushy_directory }}/sushy.conf.j2"
+ dest: "/etc/sushy/sushy.conf"
+
+ - name: Enable Rocky devel repository
+ community.general.dnf_config_manager:
+ name: devel
+ state: enabled
+
+ - name: Install package dependencies
+ ansible.builtin.dnf:
+ name:
+ - qemu-kvm
+ - libvirt
+ - libvirt-devel
+ - python3-devel
+ state: present
+
+ - name: Start and enable the QEMU service
+ ansible.builtin.systemd_service:
+ name: virtqemud
+ state: started
+ enabled: true
+
+ - name: Start and enable the virtual stroage service
+ ansible.builtin.systemd_service:
+ name: virtstoraged
+ state: started
+ enabled: true
+
+ - name: Start and enable the virtual network service
+ ansible.builtin.systemd_service:
+ name: virtnetworkd
+ state: started
+ enabled: true
+
+ - name: Create Sushy virtualenv
+ ansible.builtin.pip:
+ name:
+ - libvirt-python
+ - sushy-tools
+ virtualenv: /opt/kayobe/venvs/sushy
+ virtualenv_command: python3 -m venv
+
+ - name: Template Sushy service unit file
+ ansible.builtin.template:
+ src: "{{ sushy_directory }}/sushyemud.service.j2"
+ dest: "/etc/systemd/system/sushyemud.service"
+
+ - name: Start and enable the Sushy Emulator service
+ ansible.builtin.systemd_service:
+ name: sushyemud
+ state: started
+ enabled: true
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushy.conf.j2 b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushy.conf.j2
new file mode 100644
index 0000000000..445b9871a5
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushy.conf.j2
@@ -0,0 +1,59 @@
+{% macro storage(uuid, enum, drive_id, end) -%}
+ "{{ uuid }}": [
+ {
+ "Id": "{{ enum }}",
+ "Name": "Local Storage Controller",
+ "StorageControllers": [
+ {
+ "MemberId": "0",
+ "Name": "Contoso Integrated RAID",
+ "SpeedGbps": 12
+ }
+ ],
+ "Drives": [
+ "{{ drive_id }}"
+ ]
+ }
+ ]{% if not end %},{% endif %}
+{%- endmacro %}
+
+{% macro drive(uuid, enum, drive_id, end) -%}
+ ("{{ uuid }}", "{{ enum }}"): [
+ {
+ "Id": "{{ drive_id }}",
+ "Name": "Drive Sample",
+ "CapacityBytes": 899527000000,
+ "Protocol": "SAS"
+ }
+ ]{% if not end %},{% endif %}
+{%- endmacro %}
+
+{% macro volume(uuid, enum, hostname, end) -%}
+ ("{{ uuid }}", "{{ enum }}"): [ {
+ "libvirtPoolName": "default",
+ "libvirtVolName": "{{ hostname }}.qcow2",
+ "Id": "{{ enum }}",
+ "Name": "{{ hostname }}-volume",
+ "VolumeType": "File",
+ "CapacityBytes": 1073741824
+ }
+ ]{% if not end %},{% endif %}
+{%- endmacro %}
+
+SUSHY_EMULATOR_STORAGE = {
+{% for host in groups["sushy-libvirt"] %}
+ {{ storage(host | to_uuid, 1, ('drive-'+host) | to_uuid, (host in groups["sushy-libvirt"] | last ) | bool) }}
+{% endfor %}
+}
+
+SUSHY_EMULATOR_DRIVES = {
+{% for host in groups["sushy-libvirt"] %}
+ {{ drive(host | to_uuid, 1, ('drive-'+host) | to_uuid, (host in groups["sushy-libvirt"] | last ) | bool) }}
+{% endfor %}
+}
+
+SUSHY_EMULATOR_VOLUMES = {
+{% for host in groups["sushy-libvirt"] %}
+ {{ volume(host | to_uuid, 1, host, (host in groups["sushy-libvirt"] | last ) | bool) }}
+{% endfor %}
+}
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushyemud.service.j2 b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushyemud.service.j2
new file mode 100644
index 0000000000..248de07e09
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/sushyemud.service.j2
@@ -0,0 +1,8 @@
+[Unit]
+Description=Virtual Redfish BMC service
+
+[Service]
+ExecStart=/opt/kayobe/venvs/sushy/bin/sushy-emulator -i 192.168.33.3 -p 34343 --config /etc/sushy/sushy.conf
+
+[Install]
+WantedBy=multi-user.target default.target
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-net.xml.j2 b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-net.xml.j2
new file mode 100644
index 0000000000..ff3bcae5c7
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-net.xml.j2
@@ -0,0 +1,6 @@
+
+ vbmc-net
+ {{ 'vbmc-net' | to_uuid }}
+
+
+
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-node.xml.j2 b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-node.xml.j2
new file mode 100644
index 0000000000..b8f9aa4b30
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-node.xml.j2
@@ -0,0 +1,236 @@
+
+ {{ inventory_hostname }}
+ {{ inventory_hostname | to_uuid }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 4882812
+ 4882812
+ 2
+
+ /machine
+
+
+ hvm
+
+{% if bikolla_enable_secureboot %}
+
+
+{% else %}
+
+{% endif %}
+
+
+
+
+
+
+
+
+
+
+
+
+ destroy
+ restart
+ destroy
+
+
+
+
+
+ /usr/libexec/qemu-kvm
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /dev/urandom
+
+
+
+
+
+
+ system_u:object_r:svirt_image_t:s0:c77,c792
+
+
+
+ +107:+107
+
+
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-pool.xml.j2 b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-pool.xml.j2
new file mode 100644
index 0000000000..f238c8693b
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/ansible/vbmc-pool.xml.j2
@@ -0,0 +1,18 @@
+
+ default
+ {{ 'default' | to_uuid }}
+ 68509761536
+ 18906595328
+ 49603166208
+
+
+
+ /var/lib/libvirt/images
+
+ 0711
+ 0
+ 0
+
+
+
+
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/controllers.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/controllers.yml
new file mode 100644
index 0000000000..def5d7c3d6
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/controllers.yml
@@ -0,0 +1,14 @@
+---
+###############################################################################
+# Controller node configuration.
+
+# User with which to access the controllers via SSH during bootstrap, in order
+# to setup the Kayobe user account. Default is {{ os_distribution }}.
+controller_bootstrap_user: rocky
+
+###############################################################################
+# Controller node LVM configuration.
+
+# List of controller volume groups. See mrlesmithjr.manage_lvm role for
+# format.
+controller_lvm_groups: []
\ No newline at end of file
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/globals.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/globals.yml
new file mode 100644
index 0000000000..7f72eba21e
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/globals.yml
@@ -0,0 +1,3 @@
+---
+
+sushy_directory: "{{ kayobe_env_config_path }}/ansible"
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/inventory/group_vars/sushy-libvirt/ironic b/etc/kayobe/environments/stackhpc-sushy-baremetal/inventory/group_vars/sushy-libvirt/ironic
new file mode 100644
index 0000000000..d5f835142c
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/inventory/group_vars/sushy-libvirt/ironic
@@ -0,0 +1,9 @@
+---
+
+bikolla_enable_secureboot: false
+
+ironic_redfish_system_id: "{{ '/redfish/v1/Systems/' + inventory_hostname | to_uuid }}"
+
+redifsh_address: "http://192.168.33.3:34343"
+ironic_redfish_address: "192.168.33.3:34343"
+ironic_flat_provisioning_network: provision-net
\ No newline at end of file
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/inventory/hosts b/etc/kayobe/environments/stackhpc-sushy-baremetal/inventory/hosts
new file mode 100644
index 0000000000..a4cdc7e42c
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/inventory/hosts
@@ -0,0 +1,5 @@
+[sushy-libvirt]
+testvm0 baremetal_compute_network_address=192.168.33.159
+
+[baremetal-compute:children]
+sushy-libvirt
diff --git a/etc/kayobe/environments/stackhpc-sushy-baremetal/stackhpc.yml b/etc/kayobe/environments/stackhpc-sushy-baremetal/stackhpc.yml
new file mode 100644
index 0000000000..89aac612b4
--- /dev/null
+++ b/etc/kayobe/environments/stackhpc-sushy-baremetal/stackhpc.yml
@@ -0,0 +1,3 @@
+---
+
+stackhpc_enable_cis_benchmark_hardening_hook: false
\ No newline at end of file