
Hello. I'm on 4.1.3 with self hosted engine and glusterfs as storage. I updated the kernel on engine so I executed these steps: - enable global maintenace from the web admin gui - wait some minutes - shutdown the engine vm from inside its OS - wait some minutes - execute on one host [root@ovirt02 ~]# hosted-engine --set-maintenance --mode=none I see that the qemu-kvm process for the engine starts on two hosts and then on one of them it gets a "kill -15" and stops Is it expected behaviour? It seems somehow dangerous to me.. - when in maintenance [root@ovirt02 ~]# hosted-engine --vm-status !! Cluster is in GLOBAL MAINTENANCE mode !! --== Host 1 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt01.localdomain.local Host ID : 1 Engine status : {"health": "good", "vm": "up", "detail": "up"} Score : 2597 stopped : False Local maintenance : False crc32 : 7931c5c3 local_conf_timestamp : 19811 Host timestamp : 19794 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=19794 (Sun Jul 9 21:31:50 2017) host-id=1 score=2597 vm_conf_refresh_time=19811 (Sun Jul 9 21:32:06 2017) conf_on_shared_storage=True maintenance=False state=GlobalMaintenance stopped=False --== Host 2 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : 192.168.150.103 Host ID : 2 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : 616ceb02 local_conf_timestamp : 2829 Host timestamp : 2812 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=2812 (Sun Jul 9 21:31:52 2017) host-id=2 score=3400 vm_conf_refresh_time=2829 (Sun Jul 9 21:32:09 2017) conf_on_shared_storage=True maintenance=False state=GlobalMaintenance stopped=False --== Host 3 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt03.localdomain.local Host ID : 3 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : 871204b2 local_conf_timestamp : 24584 Host timestamp : 24567 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=24567 (Sun Jul 9 21:31:52 2017) host-id=3 score=3400 vm_conf_refresh_time=24584 (Sun Jul 9 21:32:09 2017) conf_on_shared_storage=True maintenance=False state=GlobalMaintenance stopped=False !! Cluster is in GLOBAL MAINTENANCE mode !! [root@ovirt02 ~]# - then I exit global maintenance [root@ovirt02 ~]# hosted-engine --set-maintenance --mode=none - During monitoring of status, at some point I see "EngineStart" on both host2 and host3 [root@ovirt02 ~]# hosted-engine --vm-status --== Host 1 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt01.localdomain.local Host ID : 1 Engine status : {"reason": "bad vm status", "health": "bad", "vm": "down", "detail": "down"} Score : 3230 stopped : False Local maintenance : False crc32 : 25cadbfb local_conf_timestamp : 20055 Host timestamp : 20040 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=20040 (Sun Jul 9 21:35:55 2017) host-id=1 score=3230 vm_conf_refresh_time=20055 (Sun Jul 9 21:36:11 2017) conf_on_shared_storage=True maintenance=False state=EngineDown stopped=False --== Host 2 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : 192.168.150.103 Host ID : 2 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : e6951128 local_conf_timestamp : 3075 Host timestamp : 3058 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=3058 (Sun Jul 9 21:35:59 2017) host-id=2 score=3400 vm_conf_refresh_time=3075 (Sun Jul 9 21:36:15 2017) conf_on_shared_storage=True maintenance=False state=EngineStart stopped=False --== Host 3 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt03.localdomain.local Host ID : 3 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : 382efde5 local_conf_timestamp : 24832 Host timestamp : 24816 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=24816 (Sun Jul 9 21:36:01 2017) host-id=3 score=3400 vm_conf_refresh_time=24832 (Sun Jul 9 21:36:17 2017) conf_on_shared_storage=True maintenance=False state=EngineStart stopped=False [root@ovirt02 ~]# and then [root@ovirt02 ~]# hosted-engine --vm-status --== Host 1 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt01.localdomain.local Host ID : 1 Engine status : {"reason": "bad vm status", "health": "bad", "vm": "down", "detail": "down"} Score : 3253 stopped : False Local maintenance : False crc32 : 3fc39f31 local_conf_timestamp : 20087 Host timestamp : 20070 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=20070 (Sun Jul 9 21:36:26 2017) host-id=1 score=3253 vm_conf_refresh_time=20087 (Sun Jul 9 21:36:43 2017) conf_on_shared_storage=True maintenance=False state=EngineDown stopped=False --== Host 2 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : 192.168.150.103 Host ID : 2 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : 4a05c31e local_conf_timestamp : 3109 Host timestamp : 3079 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=3079 (Sun Jul 9 21:36:19 2017) host-id=2 score=3400 vm_conf_refresh_time=3109 (Sun Jul 9 21:36:49 2017) conf_on_shared_storage=True maintenance=False state=EngineStarting stopped=False --== Host 3 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt03.localdomain.local Host ID : 3 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : 382efde5 local_conf_timestamp : 24832 Host timestamp : 24816 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=24816 (Sun Jul 9 21:36:01 2017) host-id=3 score=3400 vm_conf_refresh_time=24832 (Sun Jul 9 21:36:17 2017) conf_on_shared_storage=True maintenance=False state=EngineStart stopped=False [root@ovirt02 ~]# and [root@ovirt02 ~]# hosted-engine --vm-status --== Host 1 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt01.localdomain.local Host ID : 1 Engine status : {"reason": "bad vm status", "health": "bad", "vm": "down", "detail": "down"} Score : 3253 stopped : False Local maintenance : False crc32 : 3fc39f31 local_conf_timestamp : 20087 Host timestamp : 20070 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=20070 (Sun Jul 9 21:36:26 2017) host-id=1 score=3253 vm_conf_refresh_time=20087 (Sun Jul 9 21:36:43 2017) conf_on_shared_storage=True maintenance=False state=EngineDown stopped=False --== Host 2 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : 192.168.150.103 Host ID : 2 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : 4a05c31e local_conf_timestamp : 3109 Host timestamp : 3079 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=3079 (Sun Jul 9 21:36:19 2017) host-id=2 score=3400 vm_conf_refresh_time=3109 (Sun Jul 9 21:36:49 2017) conf_on_shared_storage=True maintenance=False state=EngineStarting stopped=False --== Host 3 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt03.localdomain.local Host ID : 3 Engine status : {"reason": "vm not running on this host", "health": "bad", "vm": "down", "detail": "unknown"} Score : 3400 stopped : False Local maintenance : False crc32 : fc1e8cf9 local_conf_timestamp : 24868 Host timestamp : 24836 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=24836 (Sun Jul 9 21:36:21 2017) host-id=3 score=3400 vm_conf_refresh_time=24868 (Sun Jul 9 21:36:53 2017) conf_on_shared_storage=True maintenance=False state=EngineStarting stopped=False [root@ovirt02 ~]# and at the end Host3 goes to "ForceStop" for the engine [root@ovirt02 ~]# hosted-engine --vm-status --== Host 1 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt01.localdomain.local Host ID : 1 Engine status : {"reason": "bad vm status", "health": "bad", "vm": "down", "detail": "down"} Score : 3312 stopped : False Local maintenance : False crc32 : e9d53432 local_conf_timestamp : 20120 Host timestamp : 20102 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=20102 (Sun Jul 9 21:36:58 2017) host-id=1 score=3312 vm_conf_refresh_time=20120 (Sun Jul 9 21:37:15 2017) conf_on_shared_storage=True maintenance=False state=EngineDown stopped=False --== Host 2 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : 192.168.150.103 Host ID : 2 Engine status : {"reason": "bad vm status", "health": "bad", "vm": "up", "detail": "powering up"} Score : 3400 stopped : False Local maintenance : False crc32 : 7d2330be local_conf_timestamp : 3141 Host timestamp : 3124 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=3124 (Sun Jul 9 21:37:04 2017) host-id=2 score=3400 vm_conf_refresh_time=3141 (Sun Jul 9 21:37:21 2017) conf_on_shared_storage=True maintenance=False state=EngineStarting stopped=False --== Host 3 status ==-- conf_on_shared_storage : True Status up-to-date : True Hostname : ovirt03.localdomain.local Host ID : 3 Engine status : {"reason": "Storage of VM is locked. Is another host already starting the VM?", "health": "bad", "vm": "already_locked", "detail": "down"} Score : 3400 stopped : False Local maintenance : False crc32 : 179825e8 local_conf_timestamp : 24900 Host timestamp : 24883 Extra metadata (valid at timestamp): metadata_parse_version=1 metadata_feature_version=1 timestamp=24883 (Sun Jul 9 21:37:08 2017) host-id=3 score=3400 vm_conf_refresh_time=24900 (Sun Jul 9 21:37:24 2017) conf_on_shared_storage=True maintenance=False state=EngineForceStop stopped=False [root@ovirt02 ~]# Comparing /var/log/libvirt/qemu/HostedEngine of host2 and host3 Host2: 2017-07-09 19:36:36.094+0000: starting up libvirt version: 2.0.0, package: 10.el7_3.9 (CentOS BuildSystem <http://bugs.centos.org>, 2017-05-25-20:52:28, c1bm.rdu2.centos.org), qemu version: 2.6.0 (qemu-kvm-ev-2.6.0-28.el7.10.1), hostname: ovirt02.localdomain.local ... char device redirected to /dev/pts/1 (label charconsole0) warning: host doesn't support requested feature: CPUID.07H:EBX.erms [bit 9] Host3: 2017-07-09 19:36:38.143+0000: starting up libvirt version: 2.0.0, package: 10.el7_3.9 (CentOS BuildSystem <http://bu gs.centos.org>, 2017-05-25-20:52:28, c1bm.rdu2.centos.org), qemu version: 2.6.0 (qemu-kvm-ev-2.6.0-28.el7.10.1), hos tname: ovirt03.localdomain.local ... char device redirected to /dev/pts/1 (label charconsole0) 2017-07-09 19:36:38.584+0000: shutting down 2017-07-09T19:36:38.589729Z qemu-kvm: terminating on signal 15 from pid 1835 any comment? Is it only a matter of powering on the VM in paused mode before starting the OS itself, or do I risk corruption due to 2 qemu-kvm processes trying to start the engine vm os? Thanks, Gianluca