[ovirt-users] VM pauses/hangs after migration

Davide Ferrari davide at billymob.com
Wed Sep 28 12:59:59 UTC 2016


Hello

trying to migrate a VM from one host to another, a big VM with 96GB of RAM,
I found that when the migration completes, the VM goes to a paused satte
and cannot be resumed. The libvirt/qemu log it gives is this:

2016-09-28T12:18:15.679176Z qemu-kvm: error while loading state section id
2(ram)
2016-09-28T12:18:15.680010Z qemu-kvm: load of migration failed:
Input/output error
2016-09-28 12:18:15.872+0000: shutting down
2016-09-28 12:22:21.467+0000: starting up libvirt version: 1.2.17, package:
13.el7_2.5 (CentOS BuildSystem <http://bugs.centos.org>,
2016-06-23-14:23:27, worker1.bsys.centos.org), qemu version: 2.3.0
(qemu-kvm-ev-2.3.0-31.el7.16.1)
LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin
QEMU_AUDIO_DRV=spice /usr/libexec/qemu-kvm -name front04.billydomain.com -S
-machine pc-i440fx-rhel7.2.0,accel=kvm,usb=off -cpu Haswell-noTSX -m
size=100663296k,slots=16,maxmem=4294967296k -realtime mlock=off -smp
32,sockets=16,cores=1,threads=2 -numa node,nodeid=0,cpus=0-31,mem=98304
-uuid 4511d1c0-6607-418f-ae75-34f605b2ad68 -smbios
type=1,manufacturer=oVirt,product=oVirt
Node,version=7-2.1511.el7.centos.2.10,serial=4C4C4544-004A-3310-8054-B2C04F474432,uuid=4511d1c0-6607-418f-ae75-34f605b2ad68
-no-user-config -nodefaults -chardev
socket,id=charmonitor,path=/var/lib/libvirt/qemu/
domain-front04.billydomain.com/monitor.sock,server,nowait -mon
chardev=charmonitor,id=monitor,mode=control -rtc
base=2016-09-28T14:22:21,driftfix=slew -global
kvm-pit.lost_tick_policy=discard -no-hpet -no-shutdown -boot strict=on
-device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -device
virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x7 -device
virtio-serial-pci,id=virtio-serial0,max_ports=16,bus=pci.0,addr=0x4 -drive
if=none,id=drive-ide0-1-0,readonly=on,format=raw -device
ide-cd,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 -drive
file=/rhev/data-center/00000001-0001-0001-0001-0000000003e3/ba2bd397-9222-424d-aecc-eb652c0169d9/images/b5b49d5c-2378-4639-9469-362e37ae7473/24fd0d3c-309b-458d-9818-4321023afacf,if=none,id=drive-virtio-disk0,format=qcow2,serial=b5b49d5c-2378-4639-9469-362e37ae7473,cache=none,werror=stop,rerror=stop,aio=threads
-device
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1
-drive
file=/rhev/data-center/00000001-0001-0001-0001-0000000003e3/ba2bd397-9222-424d-aecc-eb652c0169d9/images/f02ac1ce-52cd-4b81-8b29-f8006d0469e0/ff4e49c6-3084-4234-80a1-18a67615c527,if=none,id=drive-virtio-disk1,format=raw,serial=f02ac1ce-52cd-4b81-8b29-f8006d0469e0,cache=none,werror=stop,rerror=stop,aio=threads
-device
virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk1,id=virtio-disk1
-netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device
virtio-net-pci,netdev=hostnet0,id=net0,mac=00:1a:4a:16:01:56,bus=pci.0,addr=0x3
-chardev
socket,id=charchannel0,path=/var/lib/libvirt/qemu/channels/4511d1c0-6607-418f-ae75-34f605b2ad68.com.redhat.rhevm.vdsm,server,nowait
-device
virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=com.redhat.rhevm.vdsm
-chardev
socket,id=charchannel1,path=/var/lib/libvirt/qemu/channels/4511d1c0-6607-418f-ae75-34f605b2ad68.org.qemu.guest_agent.0,server,nowait
-device
virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,id=channel1,name=org.qemu.guest_agent.0
-chardev spicevmc,id=charchannel2,name=vdagent -device
virtserialport,bus=virtio-serial0.0,nr=3,chardev=charchannel2,id=channel2,name=com.redhat.spice.0
-vnc 192.168.10.225:1,password -k es -spice
tls-port=5902,addr=192.168.10.225,x509-dir=/etc/pki/vdsm/libvirt-spice,tls-channel=default,tls-channel=main,tls-channel=display,tls-channel=inputs,tls-channel=cursor,tls-channel=playback,tls-channel=record,tls-channel=smartcard,tls-channel=usbredir,seamless-migration=on
-k es -device
qxl-vga,id=video0,ram_size=67108864,vram_size=8388608,vgamem_mb=16,bus=pci.0,addr=0x2
-incoming tcp:0.0.0.0:49156 -device
virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 -msg timestamp=on
Domain id=5 is tainted: hook-script
red_dispatcher_loadvm_commands:
KVM: entry failed, hardware error 0x8
RAX=00000000ffffffed RBX=ffff8817ba00c000 RCX=0100000000000000
RDX=0000000000000000
RSI=0000000000000000 RDI=0000000000000046 RBP=ffff8817ba00fe98
RSP=ffff8817ba00fe98
R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000
R11=0000000000000000
R12=0000000000000006 R13=ffff8817ba00c000 R14=ffff8817ba00c000
R15=0000000000000000
RIP=ffffffff81058e96 RFL=00010286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00000000
CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
DS =0000 0000000000000000 ffffffff 00000000
FS =0000 0000000000000000 ffffffff 00000000
GS =0000 ffff8817def80000 ffffffff 00000000
LDT=0000 0000000000000000 ffffffff 00000000
TR =0040 ffff8817def93b80 00002087 00008b00 DPL=0 TSS64-busy
GDT=     ffff8817def89000 0000007f
IDT=     ffffffffff529000 00000fff
CR0=80050033 CR2=00000000ffffffff CR3=00000017b725b000 CR4=001406e0
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
EFER=0000000000000d01
Code=89 e5 fb 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 fb f4 <5d> c3 0f
1f 84 00 00 00 00 00 55 48 89 e5 f4 5d c3 66 0f 1f 84 00 00 00 00 00 55 49
89 ca
KVM: entry failed, hardware error 0x8
RAX=00000000ffffffed RBX=ffff8817ba008000 RCX=0100000000000000
RDX=0000000000000000
RSI=0000000000000000 RDI=0000000000000046 RBP=ffff8817ba00be98
RSP=ffff8817ba00be98
R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000
R11=0000000000000000
R12=0000000000000005 R13=ffff8817ba008000 R14=ffff8817ba008000
R15=0000000000000000
RIP=ffffffff81058e96 RFL=00010286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 0000000000000000 ffffffff 00000000
CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA]
SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS   [-WA]
DS =0000 0000000000000000 ffffffff 00000000
FS =0000 0000000000000000 ffffffff 00000000
GS =0000 ffff8817def40000 ffffffff 00000000
LDT=0000 0000000000000000 ffffffff 00000000
TR =0040 ffff8817def53b80 00002087 00008b00 DPL=0 TSS64-busy
GDT=     ffff8817def49000 0000007f
IDT=     ffffffffff529000 00000fff
CR0=80050033 CR2=00000000ffffffff CR3=00000017b3c9a000 CR4=001406e0
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
EFER=0000000000000d01
Code=89 e5 fb 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 fb f4 <5d> c3 0f
1f 84 00 00 00 00 00 55 48 89 e5 f4 5d c3 66 0f 1f 84 00 00 00 00 00 55 49
89 ca
KVM: entry failed, hardware error 0x80000021

If you're running a guest on an Intel machine without unrestricted mode
support, the failure can be most likely due to the guest entering an invalid
state for Intel VT. For example, the guest maybe running in big real mode
which is not supported on less recent Intel processors.

EAX=ffffffed EBX=ba020000 ECX=00000000 EDX=00000000
ESI=00000000 EDI=00000046 EBP=ba023e98 ESP=ba023e98
EIP=81058e96 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0000 00000000 0000ffff 00009300 DPL=0 DS   [-WA]
CS =f000 ffff0000 0000ffff 00009b00 DPL=0 CS16 [-RA]
SS =0000 00000000 0000ffff 00009300 DPL=0 DS   [-WA]
DS =0000 00000000 0000ffff 00009300 DPL=0 DS   [-WA]
FS =0000 00000000 0000ffff 00009300 DPL=0 DS   [-WA]
GS =0000 00000000 0000ffff 00009300 DPL=0 DS   [-WA]
LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT
TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS64-busy
GDT=     0000000000000000 0000ffff
IDT=     0000000000000000 0000ffff
CR0=80050033 CR2=00007fd826ac20a0 CR3=000000003516c000 CR4=00140060
DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000
DR3=0000000000000000
DR6=00000000ffff0ff0 DR7=0000000000000400
EFER=0000000000000d01
Code=?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? <??> ?? ??
?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ??
?? ??


Searching for errors like this I found some bug report about kernel issues
but I don't think it's the case, other VMs spawned from the same image
migrate without any issue. I have toi say that the original host running
the VM has some RAM problem (ECC multibit fault in one DIMM). Maybe that's
the problem?
How can I properly read this error log?

Thanks

-- 
Davide Ferrari
Senior Systems Engineer
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.ovirt.org/pipermail/users/attachments/20160928/1c9c9ecc/attachment-0001.html>


More information about the Users mailing list