I connected to the hypervisor, and run the hosted engine from there successfully.

However the node status is still bad.

I've set back the maintenance mode to none, but it's still showing that the cluster is in global maintenance mode.

Furthermore it seems like the the system is having a disk storage issue (the root fs seems full).

[root@milhouse-main ~]# hosted-engine --set-maintenance --mode=none
You have new mail in /var/spool/mail/root
[root@milhouse-main ~]# hosted-engine --vm-status


!! Cluster is in GLOBAL MAINTENANCE mode !!



--== Host milhouse-main.envrmnt.local (id: 1) status ==--

conf_on_shared_storage             : True
Status up-to-date                  : True
Hostname                           : milhouse-main.envrmnt.local
Host ID                            : 1
Engine status                      : {"reason": "bad vm status", "health": "bad", "vm": "down_unexpected", "detail": "Down"}
Score                              : 3400
stopped                            : False
Local maintenance                  : False
crc32                              : c3dd57b9
local_conf_timestamp               : 1652129
Host timestamp                     : 1652129
Extra metadata (valid at timestamp):
    metadata_parse_version=1
    metadata_feature_version=1
    timestamp=1652129 (Tue May 18 22:40:57 2021)
    host-id=1
    score=3400
    vm_conf_refresh_time=1652129 (Tue May 18 22:40:57 2021)
    conf_on_shared_storage=True
    maintenance=False
    state=GlobalMaintenance
    stopped=False


!! Cluster is in GLOBAL MAINTENANCE mode !!

You have new mail in /var/spool/mail/root
[root@milhouse-main ~]# virsh -r list
setlocale: No such file or directory
 Id    Name                           State
----------------------------------------------------
 2     hp_gpu-node11                  paused
 3     fp_gpu-node5                   paused
 4     hp_gpu-node10                  paused
 5     hp_gpu-node7                   paused
 6     cpu-node3                      paused
 7     hp_gpu-node5                   paused
 8     fp_gpu-node1                   paused
 9     fp_gpu-node0                   paused
 10    cpu-node1                      paused
 11    fp_gpu-node6                   paused
 12    hp_gpu-node8                   paused
 13    fp_gpu-node10                  paused
 14    fp_gpu-node4                   paused
 15    fp_gpu-node9                   paused
 16    hp_gpu-node4                   paused
 17    fp_gpu-node15                  paused
 18    fp_gpu-node8                   paused
 19    hp_gpu-node0                   paused
 20    fp_gpu-node14                  paused
 21    fp_gpu-node2                   paused
 22    fp_gpu-node11                  paused
 23    hp_gpu-node9                   paused
 24    cpu-node2                      paused
 25    hp_gpu-node1                   paused
 26    hp_gpu-node2                   paused
 27    fp_gpu-node12                  paused
 28    hp_gpu-node3                   paused
 29    hp_gpu-node6                   paused
 30    infra-vm                       paused
 31    cpu-node0                      paused
 32    fp_gpu-node3                   paused
 33    fp_gpu-node7                   paused
 34    fp_gpu-node13                  paused
 35    bigip-16.1x-milhouse           paused
 37    HostedEngine                   running

You have new mail in /var/spool/mail/root
[root@milhouse-main ~]# nodectl check
Status: FAILED
Bootloader ... OK
  Layer boot entries ... OK
  Valid boot entries ... OK
Mount points ... OK
  Separate /var ... OK
  Discard is used ... OK
Basic storage ... OK
  Initialized VG ... OK
  Initialized Thin Pool ... OK
  Initialized LVs ... OK
Thin storage ... FAILED - It looks like the LVM layout is not correct. The reason could be an incorrect installation.
  Checking available space in thinpool ... FAILED - Data or Metadata usage is above threshold. Check the output of `lvs`
  Checking thinpool auto-extend ... OK
vdsmd ... OK
[root@milhouse-main ~]# lvs
  LV                          VG   Attr       LSize    Pool   Origin                    Data%  Meta%  Move Log Cpy%Sync Convert
  home                        rhvh Vwi-aotz--    1.00g pool00                           4.79
  pool00                      rhvh twi-aotz--  422.84g                                  89.91  15.03
  rhvh-4.3.7.1-0.20191211.0   rhvh Vwi---tz-k <325.37g pool00 root
  rhvh-4.3.7.1-0.20191211.0+1 rhvh Vwi-a-tz-- <325.37g pool00 rhvh-4.3.7.1-0.20191211.0 18.30
  rhvh-4.3.8.1-0.20200126.0   rhvh Vri---tz-k <325.37g pool00
  rhvh-4.3.8.1-0.20200126.0+1 rhvh Vwi-aotz-- <325.37g pool00 rhvh-4.3.8.1-0.20200126.0 95.65
  root                        rhvh Vri---tz-k <325.37g pool00
  swap                        rhvh -wi-ao----    4.00g
  tmp                         rhvh Vwi-aotz--    1.00g pool00                           5.15
  var                         rhvh Vwi-aotz--   15.00g pool00                           6.58
  var_crash                   rhvh Vwi-aotz--   10.00g pool00                           60.48
  var_log                     rhvh Vwi-aotz--    8.00g pool00                           22.84
  var_log_audit               rhvh Vwi-aotz--    2.00g pool00                           5.75
[root@milhouse-main ~]# df -kh
Filesystem                                       Size  Used Avail Use% Mounted on
devtmpfs                                         252G     0  252G   0% /dev
tmpfs                                            252G   16K  252G   1% /dev/shm
tmpfs                                            252G  1.4G  251G   1% /run
tmpfs                                            252G     0  252G   0% /sys/fs/cgroup
/dev/mapper/rhvh-rhvh--4.3.8.1--0.20200126.0+1   321G  306G     0 100% /
/dev/mapper/rhvh-home                            976M  2.6M  907M   1% /home
/dev/mapper/3600508b1001c9cd336275e31e675f593p2  976M  367M  543M  41% /boot
/dev/mapper/rhvh-tmp                             976M  2.6M  907M   1% /tmp
/dev/mapper/3600508b1001c9cd336275e31e675f593p1  200M  9.7M  191M   5% /boot/efi
/dev/mapper/rhvh-var                              15G  631M   14G   5% /var
/dev/mapper/rhvh-var_crash                       9.8G  5.8G  3.5G  63% /var/crash
/dev/mapper/rhvh-var_log                         7.8G  1.6G  5.9G  21% /var/log
/dev/mapper/rhvh-var_log_audit                   2.0G   26M  1.8G   2% /var/log/audit
192.168.36.64:/exports/data                      321G  306G     0 100% /rhev/data-center/mnt/192.168.36.64:_exports_data
tmpfs                                             51G     0   51G   0% /run/user/0
[root@milhouse-main ~]# 

Le mar. 18 mai 2021 à 23:47, marcel d'heureuse <marcel@deheureu.se> a écrit :

But the score is 3400. The engine Image should be ok.

Is the engine volume mounted and available as brick?

gluster volume status engine

Br
Marcel
Am 18. Mai 2021 23:37:38 MESZ schrieb Edward Berger <edwberger@gmail.com>:
With all the other VMs paused, I would guess all the VM disk image storage is offline or unreachable
from the hypervisor.

login to this hypervisor host. df -kh to see whats mounted

check the fileserving from hosts there.
On Tue, May 18, 2021 at 4:33 PM Eugène Ngontang <sympavali@gmail.com> wrote:
Hi,

Our self hosted engine has been accidentally shut down by a teammate and now I'm trying hard to get it back up without success.

I've tried the --vm-start command but it says the VM is in WaitForLaunch status.

I've set the global maintenance mode but it does nothing.
root@milhouse-main ~]# hosted-engine --vm-start
VM exists and is down, cleaning up and restarting
VM in WaitForLaunch

[root@milhouse-main ~]# hosted-engine --set-maintenance --mode=global
[root@milhouse-main ~]# hosted-engine --vm-status


!! Cluster is in GLOBAL MAINTENANCE mode !!



--== Host milhouse-main.envrmnt.local (id: 1) status ==--

conf_on_shared_storage             : True
Status up-to-date                  : False
Hostname                           : milhouse-main.envrmnt.local
Host ID                            : 1
Engine status                      : unknown stale-data
Score                              : 3400
stopped                            : False
Local maintenance                  : False
crc32                              : 931b2db9
local_conf_timestamp               : 1642052
Host timestamp                     : 1642052
Extra metadata (valid at timestamp):
	metadata_parse_version=1
	metadata_feature_version=1
	timestamp=1642052 (Tue May 18 19:52:59 2021)
	host-id=1
	score=3400
	vm_conf_refresh_time=1642052 (Tue May 18 19:53:00 2021)
	conf_on_shared_storage=True
	maintenance=False
	state=EngineDown
	stopped=False


!! Cluster is in GLOBAL MAINTENANCE mode !!

You have new mail in /var/spool/mail/root
[root@milhouse-main ~]# hosted-engine --vm-start
VM exists and is down, cleaning up and restarting
VM in WaitForLaunch
[root@milhouse-main ~]# 
And when I list all vms, I can see the hosted engine is in the Shut Off status and the managed vms are all paused
[root@milhouse-main ~]# virsh -r list --all
setlocale: No such file or directory
 Id    Name                           State
----------------------------------------------------
 2     hp_gpu-node11                  paused
 3     fp_gpu-node5                   paused
 4     hp_gpu-node10                  paused
 5     hp_gpu-node7                   paused
 6     cpu-node3                      paused
 7     hp_gpu-node5                   paused
 8     fp_gpu-node1                   paused
 9     fp_gpu-node0                   paused
 10    cpu-node1                      paused
 11    fp_gpu-node6                   paused
 12    hp_gpu-node8                   paused
 13    fp_gpu-node10                  paused
 14    fp_gpu-node4                   paused
 15    fp_gpu-node9                   paused
 16    hp_gpu-node4                   paused
 17    fp_gpu-node15                  paused
 18    fp_gpu-node8                   paused
 19    hp_gpu-node0                   paused
 20    fp_gpu-node14                  paused
 21    fp_gpu-node2                   paused
 22    fp_gpu-node11                  paused
 23    hp_gpu-node9                   paused
 24    cpu-node2                      paused
 25    hp_gpu-node1                   paused
 26    hp_gpu-node2                   paused
 27    fp_gpu-node12                  paused
 28    hp_gpu-node3                   paused
 29    hp_gpu-node6                   paused
 30    infra-vm                       paused
 31    cpu-node0                      paused
 32    fp_gpu-node3                   paused
 33    fp_gpu-node7                   paused
 34    fp_gpu-node13                  paused
 35    bigip-16.1x-milhouse           paused
 -     HostedEngine                   shut off

[root@milhouse-main ~]# 
I don't want to reboot the host server, cause I could loose all my VMs.

Can someone help here please?

Thanks.

Regards,
Eugène NG
--
LesCDN
engontang@lescdn.com
------------------------------------------------------------
Aux hommes il faut un chef, et au chef il faut des hommes!
L'habit ne fait pas le moine, mais lorsqu'on te voit on te juge!
_______________________________________________
Users mailing list -- users@ovirt.org
To unsubscribe send an email to users-leave@ovirt.org
Privacy Statement: https://www.ovirt.org/privacy-policy.html
oVirt Code of Conduct: https://www.ovirt.org/community/about/community-guidelines/
List Archives: https://lists.ovirt.org/archives/list/users@ovirt.org/message/ZA7FHNC3K7TXF3P47LZP7JNKNO4QCB4M/
_______________________________________________
Users mailing list -- users@ovirt.org
To unsubscribe send an email to users-leave@ovirt.org
Privacy Statement: https://www.ovirt.org/privacy-policy.html
oVirt Code of Conduct: https://www.ovirt.org/community/about/community-guidelines/
List Archives: https://lists.ovirt.org/archives/list/users@ovirt.org/message/XI4DIEVR4TSSJ2MQAAR57BRDCZEOSE2N/