After a failed attempt at migrating our HostedEngine to a new iSCSI storage domain,
we're unable to restart the original HostedEngine.
Please see below for some details, and let me know what more information I can provide.
"Lnxvirt07" was the Host used to attempt the migration. Any help would be
greatly appreciated.
Many thanks,
Devin
------
[root@lnxvirt01 ~]# tail -n 5 /var/log/ovirt-hosted-engine-ha/agent.log
MainThread::INFO::2023-11-01
12:29:53,514::state_decorators::51::ovirt_hosted_engine_ha.agent.hosted_engine.HostedEngine::(check)
Global maintenance detected
MainThread::INFO::2023-11-01
12:29:54,151::ovf_store::117::ovirt_hosted_engine_ha.lib.ovf.ovf_store.OVFStore::(scan)
Found OVF_STORE: imgUUID:05ef954f-d06d-401c-85ec-5992e2afbe7d,
volUUID:d2860f1d-19cf-4084-8a7e-d97880c32431
MainThread::INFO::2023-11-01
12:29:54,530::ovf_store::117::ovirt_hosted_engine_ha.lib.ovf.ovf_store.OVFStore::(scan)
Found OVF_STORE: imgUUID:a375a35b-7a87-4df4-8d29-a5ba371fee85,
volUUID:ef8b3dae-bcae-4d58-bea8-cf1a34872267
MainThread::ERROR::2023-11-01
12:29:54,813::config_ovf::65::ovirt_hosted_engine_ha.agent.hosted_engine.HostedEngine.config.vm::(_get_vm_conf_content_from_ovf_store)
Failed extracting VM OVF from the OVF_STORE volume, falling back to initial vm.conf
MainThread::INFO::2023-11-01
12:29:54,843::hosted_engine::531::ovirt_hosted_engine_ha.agent.hosted_engine.HostedEngine::(_monitoring_loop)
Current state GlobalMaintenance (score: 3400)
[root@lnxvirt01 ~]# hosted-engine --vm-start
Command VM.getStats with args {'vmID':
'e6370d8f-c083-4f28-83d0-a232d693e07a'} failed:
(code=1, message=Virtual machine does not exist: {'vmId':
'e6370d8f-c083-4f28-83d0-a232d693e07a'})
Command VM.create with args {'vmID':
'e6370d8f-c083-4f28-83d0-a232d693e07a', 'vmParams': {'vmId':
'e6370d8f-c083-4f28-83d0-a232d693e07a', 'memSize': '16384',
'display': 'vnc', 'vmName': 'HostedEngine', 'smp':
'4', 'maxVCpus': '40', 'cpuType': 'Haswell-noTSX',
'emulatedMachine': 'pc', 'devices': [{'index':
'2', 'iface': 'ide', 'address': {'controller':
'0', 'target': '0', 'unit': '0', 'bus':
'1', 'type': 'drive'}, 'specParams': {},
'readonly': 'true', 'deviceId':
'b3e2f40a-e28d-493c-af50-c1193fb9dc97', 'path': '',
'device': 'cdrom', 'shared': 'false', 'type':
'disk'}, {'index': '0', 'iface': 'virtio',
'format': 'raw', 'poolID':
'00000000-0000-0000-0000-000000000000', 'volumeID':
'6afa3b19-7a1a-4e5c-a681-eed756d316e9', 'imageID':
'94628710-cf73-4589-bd84-e58f741a4d5f', 'specParams': {},
'readonly': 'false', 'domainID':
'555ad71c-1a4e-42b3-af8c-db39d9b9df67', 'optional': 'false',
'deviceId': '6afa3b19-7a1a-4e5c-a681-eed756d316e9', 'address':
{'bus': '0x00', 'slot': '0x06', 'domain':
'0x0000', 'type': 'pci', 'function': '0x0'},
'device': 'disk', 'shared': 'exclusive',
'propagateErrors': 'off', 'type': 'disk',
'bootOrder': '1'}, {'device': 'scsi', 'model':
'virtio-scsi', 'type': 'controller'}, {'nicModel':
'pv', 'macAddr': '00:16:3e:3b:3f:14', 'linkActive':
'true', 'network': 'ovirtmgmt', 'specParams': {},
'deviceId': '002afd06-9649-4ac5-a5e8-1a4945c3c136', 'address':
{'bus': '0x00', 'slot': '0x03', 'domain':
'0x0000', 'type': 'pci', 'function': '0x0'},
'device': 'bridge', 'type': 'interface'},
{'device': 'console', 'type': 'console'},
{'device': 'vga', 'alias': 'video0', 'type':
'video'}, {'device': 'vnc', 'type': 'graphics'},
{'device': 'virtio', 'specParams': {'source':
'urandom'}, 'model': 'virtio', 'type': 'rng'}]}}
failed:
(code=100, message=General Exception: ("'xml'",))
VM failed to launch
[root@lnxvirt01 ~]# cat /etc/ovirt-hosted-engine/hosted-engine.conf
fqdn=lnxvirt-engine.classe.cornell.edu
vm_disk_id=94628710-cf73-4589-bd84-e58f741a4d5f
vm_disk_vol_id=6afa3b19-7a1a-4e5c-a681-eed756d316e9
vmid=e6370d8f-c083-4f28-83d0-a232d693e07a
storage=192.168.56.50,192.168.56.51,192.168.56.52,192.168.56.53
nfs_version=
mnt_options=
conf=/var/run/ovirt-hosted-engine-ha/vm.conf
host_id=8
console=vnc
domainType=iscsi
spUUID=00000000-0000-0000-0000-000000000000
sdUUID=555ad71c-1a4e-42b3-af8c-db39d9b9df67
connectionUUID=e29cf818-5ee5-46e1-85c1-8aeefa33e95d
vdsm_use_ssl=true
gateway=192.168.55.1
bridge=ovirtmgmt
network_test=dns
tcp_t_address=
tcp_t_port=
metadata_volume_UUID=2bf987a2-ab81-454c-9fc7-dc7ec8945fd9
metadata_image_UUID=35429b63-16ca-417a-b87a-d232463bf6a3
lockspace_volume_UUID=b0d09780-2047-433c-812d-10ba0beff788
lockspace_image_UUID=8ccb878d-9938-43c8-908b-e1b416fe991c
conf_volume_UUID=0b40ac60-499e-4ff1-83d0-fc578f1af3dc
conf_image_UUID=551d4fe5-a9f7-4ba1-9951-87418362b434
# The following are used only for iSCSI storage
iqn=iqn.2002-10.com.infortrend:raid.uid58207.001
portal=1
user=
password=
port=3260,3260,3260,3260
[root@lnxvirt01 ~]# hosted-engine --vm-status
!! Cluster is in GLOBAL MAINTENANCE mode !!
--== Host
lnxvirt06.classe.cornell.edu (id: 1) status ==--
Host ID : 1
Host timestamp : 3718817
Score : 3400
Engine status : {"vm": "down",
"health": "bad", "detail": "unknown",
"reason": "vm not running on this host"}
Hostname :
lnxvirt06.classe.cornell.edu
Local maintenance : False
stopped : False
crc32 : 233a1425
conf_on_shared_storage : True
local_conf_timestamp : 3718818
Status up-to-date : True
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=3718817 (Wed Nov 1 12:26:35 2023)
host-id=1
score=3400
vm_conf_refresh_time=3718818 (Wed Nov 1 12:26:37 2023)
conf_on_shared_storage=True
maintenance=False
state=GlobalMaintenance
stopped=False
--== Host
lnxvirt05.classe.cornell.edu (id: 2) status ==--
Host ID : 2
Host timestamp : 3719461
Score : 3400
Engine status : {"vm": "down",
"health": "bad", "detail": "unknown",
"reason": "vm not running on this host"}
Hostname :
lnxvirt05.classe.cornell.edu
Local maintenance : False
stopped : False
crc32 : b3c81abe
conf_on_shared_storage : True
local_conf_timestamp : 3719462
Status up-to-date : True
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=3719461 (Wed Nov 1 12:26:41 2023)
host-id=2
score=3400
vm_conf_refresh_time=3719462 (Wed Nov 1 12:26:42 2023)
conf_on_shared_storage=True
maintenance=False
state=GlobalMaintenance
stopped=False
--== Host
lnxvirt04.classe.cornell.edu (id: 3) status ==--
Host ID : 3
Host timestamp : 3718684
Score : 3400
Engine status : {"vm": "down",
"health": "bad", "detail": "unknown",
"reason": "vm not running on this host"}
Hostname :
lnxvirt04.classe.cornell.edu
Local maintenance : False
stopped : False
crc32 : 03a57b14
conf_on_shared_storage : True
local_conf_timestamp : 3718686
Status up-to-date : True
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=3718684 (Wed Nov 1 12:26:41 2023)
host-id=3
score=3400
vm_conf_refresh_time=3718686 (Wed Nov 1 12:26:43 2023)
conf_on_shared_storage=True
maintenance=False
state=GlobalMaintenance
stopped=False
--== Host
lnxvirt03.classe.cornell.edu (id: 4) status ==--
Host ID : 4
Host timestamp : 3719430
Score : 3400
Engine status : {"vm": "down",
"health": "bad", "detail": "unknown",
"reason": "vm not running on this host"}
Hostname :
lnxvirt03.classe.cornell.edu
Local maintenance : False
stopped : False
crc32 : adb1aad2
conf_on_shared_storage : True
local_conf_timestamp : 3719432
Status up-to-date : True
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=3719430 (Wed Nov 1 12:26:35 2023)
host-id=4
score=3400
vm_conf_refresh_time=3719432 (Wed Nov 1 12:26:36 2023)
conf_on_shared_storage=True
maintenance=False
state=GlobalMaintenance
stopped=False
--== Host
lnxvirt02.classe.cornell.edu (id: 5) status ==--
Host ID : 5
Host timestamp : 3719408
Score : 3400
Engine status : {"vm": "down",
"health": "bad", "detail": "unknown",
"reason": "vm not running on this host"}
Hostname :
lnxvirt02.classe.cornell.edu
Local maintenance : False
stopped : False
crc32 : 1996a067
conf_on_shared_storage : True
local_conf_timestamp : 3719410
Status up-to-date : True
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=3719408 (Wed Nov 1 12:26:39 2023)
host-id=5
score=3400
vm_conf_refresh_time=3719410 (Wed Nov 1 12:26:41 2023)
conf_on_shared_storage=True
maintenance=False
state=GlobalMaintenance
stopped=False
--== Host
lnxvirt07.classe.cornell.edu (id: 7) status ==--
Host ID : 7
Host timestamp : 495392
Score : 0
Engine status : unknown stale-data
Hostname :
lnxvirt07.classe.cornell.edu
Local maintenance : False
stopped : True
crc32 : 2572e907
conf_on_shared_storage : True
local_conf_timestamp : 495352
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=495392 (Tue Oct 31 10:20:12 2023)
host-id=7
score=0
vm_conf_refresh_time=495352 (Tue Oct 31 10:19:33 2023)
conf_on_shared_storage=True
maintenance=False
state=AgentStopped
stopped=True
--== Host
lnxvirt01.classe.cornell.edu (id: 8) status ==--
Host ID : 8
Host timestamp : 1729103
Score : 3400
Engine status : {"vm": "down",
"health": "bad", "detail": "unknown",
"reason": "vm not running on this host"}
Hostname :
lnxvirt01.classe.cornell.edu
Local maintenance : False
stopped : False
crc32 : 2e57e99d
conf_on_shared_storage : True
local_conf_timestamp : 1729104
Status up-to-date : True
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=1729103 (Wed Nov 1 12:26:31 2023)
host-id=8
score=3400
vm_conf_refresh_time=1729104 (Wed Nov 1 12:26:33 2023)
conf_on_shared_storage=True
maintenance=False
state=GlobalMaintenance
stopped=False
!! Cluster is in GLOBAL MAINTENANCE mode !!