The error still persist after change in following, is this the worng place? I couldn't
do it under HCI setup:
/usr/share/ansible/roles/ovirt.hosted_engine_setup/tasks/bootstrap_local_vm/05_add_host.yml
Change:
---
- name: Add host
block:
- name: Wait for ovirt-engine service to start
uri:
url: http://{{ he_fqdn }}/ovirt-engine/services/health
return_content: true
register: engine_status
until: "'DB Up!Welcome to Health Status!' in engine_status.content"
retries: 30
delay: 20
- debug: var=engine_status
- name: Open a port on firewalld
firewalld:
port: "{{ he_webui_forward_port }}/tcp"
permanent: false
immediate: true
state: enabled
- name: Expose engine VM webui over a local port via ssh port forwarding
command: >-
sshpass -e ssh -tt -o ServerAliveInterval=5 -o StrictHostKeyChecking=no -o
UserKnownHostsFile=/dev/null -g -L
{{ he_webui_forward_port }}:{{ he_fqdn }}:443 {{ he_fqdn }}
environment:
- "{{ he_cmd_lang }}"
- SSHPASS: "{{ he_appliance_password }}"
changed_when: true
async: 86400
poll: 0
register: sshpf
- debug: var=sshpf
- name: Evaluate temporary bootstrap engine URL
set_fact: bootstrap_engine_url="https://{{ he_host_address }}:{{
he_webui_forward_port }}/ovirt-engine/"
- debug:
msg: >-
The bootstrap engine is temporary accessible over {{ bootstrap_engine_url }}
- name: Detect VLAN ID
shell: ip -d link show {{ he_bridge_if }} | grep 'vlan ' | grep -Po 'id
\K[\d]+' | cat
environment: "{{ he_cmd_lang }}"
register: vlan_id_out
changed_when: true
- debug: var=vlan_id_out
- name: Set Engine public key as authorized key without validating the TLS/SSL
certificates
authorized_key:
user: root
state: present
key: https://{{ he_fqdn
}}/ovirt-engine/services/pki-resource?resource=engine-certificate&format=OPENSSH-PUBKEY
validate_certs: false
register: output
failed_when: never
- name: DEBUG
debug:
var: 'output'
- include_tasks: auth_sso.yml
- name: Ensure that the target datacenter is present
ovirt_datacenter:
state: present
name: "{{ he_data_center }}"
wait: true
local: false
auth: "{{ ovirt_auth }}"
register: dc_result_presence
- name: Ensure that the target cluster is present in the target datacenter
ovirt_cluster:
state: present
name: "{{ he_cluster }}"
data_center: "{{ he_data_center }}"
cpu_type: "{{ he_cluster_cpu_type | default(omit) }}"
wait: true
auth: "{{ ovirt_auth }}"
register: cluster_result_presence
- name: Check actual cluster location
fail:
msg: >-
A cluster named '{{ he_cluster }}' has been created earlier in a
different
datacenter and cluster moving is still not supported.
You can avoid this specifying a different cluster name;
please fix accordingly and try again.
when: cluster_result_presence.cluster.data_center.id !=
dc_result_presence.datacenter.id
- name: Enable GlusterFS at cluster level
ovirt_cluster:
data_center: "{{ he_data_center }}"
name: "{{ he_cluster }}"
auth: "{{ ovirt_auth }}"
virt: true
gluster: true
fence_skip_if_gluster_bricks_up: true
fence_skip_if_gluster_quorum_not_met: true
when: he_enable_hc_gluster_service is defined and he_enable_hc_gluster_service
- name: Set VLAN ID at datacenter level
ovirt_network:
data_center: "{{ he_data_center }}"
name: "{{ he_mgmt_network }}"
vlan_tag: "{{ vlan_id_out.stdout }}"
auth: "{{ ovirt_auth }}"
when: vlan_id_out.stdout|length > 0
- name: Get active list of active firewalld zones
shell: set -euo pipefail && firewall-cmd --get-active-zones | grep -v
"^\s*interfaces"
environment: "{{ he_cmd_lang }}"
register: active_f_zone
changed_when: true
- name: Configure libvirt firewalld zone
firewalld:
zone: libvirt
service: "{{ service_item }}"
permanent: true
immediate: true
state: enabled
with_items:
- vdsm
- libvirt-tls
- ovirt-imageio
- ovirt-vmconsole
- ssh
- vdsm
loop_control:
loop_var: service_item
when: "'libvirt' in active_f_zone.stdout_lines"
- name: Add host
ovirt_host:
cluster: "{{ he_cluster }}"
name: "{{ he_host_name }}"
state: present
public_key: true
address: "{{ he_host_address }}"
auth: "{{ ovirt_auth }}"
async: 1
poll: 0
- name: Pause the execution to let the user interactively reconfigure the host
block:
- name: Let the user connect to the bootstrap engine to manually fix host
configuration
debug:
msg: >-
You can now connect to {{ bootstrap_engine_url }} and check the status of this
host and
eventually remediate it, please continue only when the host is listed as
'up'
- include_tasks: pause_execution.yml
when: he_pause_host|bool
# refresh the auth token after a long operation to avoid having it expired
- include_tasks: auth_revoke.yml
- include_tasks: auth_sso.yml
- name: Wait for the host to be up
ovirt_host_info:
pattern: name={{ he_host_name }}
auth: "{{ ovirt_auth }}"
register: host_result_up_check
until: >-
host_result_up_check is succeeded and
host_result_up_check.ovirt_hosts|length >= 1 and
(
host_result_up_check.ovirt_hosts[0].status == 'up' or
host_result_up_check.ovirt_hosts[0].status == 'non_operational'
)
retries: 120
delay: 10
ignore_errors: true
- debug: var=host_result_up_check
- name: Notify the user about a failure
fail:
msg: >-
Host is not up, please check logs, perhaps also on the engine machine
when: host_result_up_check is failed
- name: Handle deployment failure
block:
- set_fact: host_id={{ host_result_up_check.ovirt_hosts[0].id }}
- name: Collect error events from the Engine
ovirt_event_facts:
auth: "{{ ovirt_auth }}"
search: "severity>=error"
register: error_events
- name: Generate the error message from the engine events
set_fact:
error_description: >-
{% for event in error_events.ansible_facts.ovirt_events |
groupby('code') %}
{% if event[1][0].host.id == host_id %}
code {{ event[0] }}: {{ event[1][0].description }},
{% endif %}
{% endfor %}
ignore_errors: true
- name: Fail with error description
fail:
msg: >-
The host has been set in non_operational status,
deployment errors: {{ error_description }}
fix accordingly and re-deploy.
when: error_description is defined
- name: Fail with generic error
fail:
msg: >-
The host has been set in non_operational status,
please check engine logs,
more info can be found in the engine logs,
fix accordingly and re-deploy.
when: error_description is not defined
when: >-
host_result_up_check is succeeded and
host_result_up_check.ovirt_hosts|length >= 1 and
host_result_up_check.ovirt_hosts[0].status == 'non_operational'
rescue:
- name: Sync on engine machine
command: sync
changed_when: true
- name: Fetch logs from the engine VM
include_tasks: fetch_engine_logs.yml
ignore_errors: true
- include_tasks: clean_localvm_dir.yml
- include_tasks: clean_local_storage_pools.yml
- name: Notify the user about a failure
fail:
msg: >
The system may not be provisioned according to the playbook
results: please check the logs for the issue,
fix accordingly or re-deploy from scratch.
Log:
[ 00:04 ] Remove ovirt-engine-appliance rpm
[ < 1 sec ] Include custom tasks for after setup customization
[ < 1 sec ] Include Host vars
[ FAILED ] Set Engine public key as authorized key without validating the TLS/SSL
certificates
2021-05-17 14:50:49,553+0200 DEBUG ansible on_any args
<ansible.executor.stats.AggregateStats object at 0x7fb4ec4ee9b0> kwargs
[root@hej1 bootstrap_local_vm]# cat
/var/log/ovirt-hosted-engine-setup/ovirt-hosted-engine-setup-ansible-create_target_vm-20210517144031-ebmk45.log