Hello,
ovirt-release-host-node-4.5.4-1.el8.x86_64
Today I found my cluster in an unconsinstent state
I have three nodes: ovirt-node2 ovirt-node3 ovirt-node4 with self hosted engine deployed
using external nfs storage
My first attempt was to launch hosted-engine --vm-statos on three nodes and I get three
inconsinstent states:
[root@ovirt-node2 ~]# hosted-engine --vm-status
The hosted engine configuration has not been retrieved from shared storage yet,
please ensure that ovirt-ha-agent service is running.
--== Host ovirt-node3.ovirt (id: 1) status ==--
Host ID : 1
Host timestamp : 1942858
Score : 3400
Engine status : unknown stale-data
Hostname : ovirt-node3.ovirt
Local maintenance : False
stopped : False
crc32 : 37cf5256
conf_on_shared_storage : True
local_conf_timestamp : 1942859
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=1942858 (Sun Mar 12 01:26:20 2023)
host-id=1
score=3400
vm_conf_refresh_time=1942859 (Sun Mar 12 01:26:22 2023)
conf_on_shared_storage=True
maintenance=False
state=EngineDown
stopped=False
--== Host ovirt-node2.ovirt (id: 2) status ==--
Host ID : 2
Host timestamp : 4425500
Score : 3400
Engine status : unknown stale-data
Hostname : ovirt-node2.ovirt
Local maintenance : False
stopped : False
crc32 : ab944a8a
conf_on_shared_storage : True
local_conf_timestamp : 4425500
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=4425500 (Sun Mar 12 01:26:01 2023)
host-id=2
score=3400
vm_conf_refresh_time=4425500 (Sun Mar 12 01:26:01 2023)
conf_on_shared_storage=True
maintenance=False
state=EngineUp
stopped=False
[root@ovirt-node3 ~]# hosted-engine --vm-status
--== Host ovirt-node4.ovirt (id: 3) status ==--
Host ID : 3
Host timestamp : 4452814
Score : 3400
Engine status : unknown stale-data
Hostname : ovirt-node4.ovirt
Local maintenance : False
stopped : False
crc32 : 95890d21
conf_on_shared_storage : True
local_conf_timestamp : 4452814
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=4452814 (Sun Mar 12 01:25:55 2023)
host-id=3
score=3400
vm_conf_refresh_time=4452814 (Sun Mar 12 01:25:55 2023)
conf_on_shared_storage=True
maintenance=False
state=EngineDown
stopped=False
[root@ovirt-node4 ~]# hosted-engine --vm-status
--== Host ovirt-node3.ovirt (id: 1) status ==--
Host ID : 1
Host timestamp : 1942848
Score : 3400
Engine status : unknown stale-data
Hostname : ovirt-node3.ovirt
Local maintenance : False
stopped : False
crc32 : 7f645fbc
conf_on_shared_storage : True
local_conf_timestamp : 1942848
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=1942848 (Sun Mar 12 01:26:10 2023)
host-id=1
score=3400
vm_conf_refresh_time=1942848 (Sun Mar 12 01:26:10 2023)
conf_on_shared_storage=True
maintenance=False
state=EngineDown
stopped=False
--== Host ovirt-node2.ovirt (id: 2) status ==--
Host ID : 2
Host timestamp : 4428404
Score : 3400
Engine status : unknown stale-data
Hostname : ovirt-node2.ovirt
Local maintenance : False
stopped : False
crc32 : af938ff8
conf_on_shared_storage : True
local_conf_timestamp : 4428404
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=4428404 (Sun Mar 12 02:14:45 2023)
host-id=2
score=3400
vm_conf_refresh_time=4428404 (Sun Mar 12 02:14:45 2023)
conf_on_shared_storage=True
maintenance=False
state=EngineUp
stopped=False
--== Host ovirt-node4.ovirt (id: 3) status ==--
Host ID : 3
Host timestamp : 4470173
Score : 3400
Engine status : unknown stale-data
Hostname : ovirt-node4.ovirt
Local maintenance : False
stopped : False
crc32 : d8fdb650
conf_on_shared_storage : True
local_conf_timestamp : 4470173
Status up-to-date : False
Extra metadata (valid at timestamp):
metadata_parse_version=1
metadata_feature_version=1
timestamp=4470173 (Sun Mar 12 06:15:15 2023)
host-id=3
score=3400
vm_conf_refresh_time=4470173 (Sun Mar 12 06:15:15 2023)
conf_on_shared_storage=True
maintenance=False
state=EngineStarting
stopped=False
Obviously there is something weird happening.
Currently I put my cluster in global maintenance mode but I had to launch hosted-engine
--set-maintenance --mode=global both on node3 and node4
Please give me some hint.... During this weekend I received hundreds of mail telling that
hosted-engine went in inconsistent state