[Kimchi-devel] [PATCH v11 3/6] Host device passthrough: Directly assign and dissmis host device from VM
Aline Manera
alinefm at linux.vnet.ibm.com
Fri Oct 3 17:06:01 UTC 2014
On 09/30/2014 07:00 AM, Zhou Zheng Sheng wrote:
> This patch enbales Kimchi's VM to use host devices directly, and it
> greatly improves the related device performance. The user can assign
> PCI, USB and SCSI LUN directly to VM, as long as the host supports one
> of Intel VT-d, AMD IOMMU or POWER sPAPR technology and runs a recent
> release of Linux kernel.
>
> This patch adds a sub-collection "hostdevs" to the URI vms/vm-name/.
> The front-end can GET
> vms/vm-name/hostdevs
> and
> vms/vm-name/hostdevs/dev-name
> or POST (assign)
> vms/vm-name/hostdevs
> and DELETE (dismiss)
> vms/vm-name/hostdevs/dev-name
>
> The eligible devices to assign are the devices listed by the URI
> host/devices?_passthrough=1
> When assigning a host PCI device to VM, all the eligible PCI devices in
> the same IOMMU group are also automatically assigned, and vice versa
> when dismissing a host PIC device from the VM.
>
> Some examples:
>
> Assign a USB device:
> curl -k -u root -H "Content-Type: application/json" \
> -H "Accept: application/json" \
> -X POST -d '{"name": "usb_1_1_6"}' \
> 'https://127.0.0.1:8001/vms/rhel65/hostdevs'
>
> Assign a PCI device:
> -d '{"name": "pci_0000_0d_00_0"}'
>
> Assign a SCSI LUN:
> -d '{"name": "scsi_1_0_0_0"}'
>
> List assigned devices:
> curl -k -u root -H "Content-Type: application/json" \
> -H "Accept: application/json" \
> 'https://127.0.0.1:8001/vms/rhel65/hostdevs'
> The above command should print following.
> [
> {
> "type":"scsi",
> "name":"scsi_1_0_0_0"
> },
> {
> "type":"usb",
> "name":"usb_1_1_6"
> },
> {
> "type":"pci",
> "name":"pci_0000_0d_00_0"
> },
> {
> "type":"pci",
> "name":"pci_0000_03_00_0"
> }
> ]
> Notice that the device pci_0000_03_00_0 is also assigned automatically.
>
> The assigned devices are hot-plugged to VM and also written to the
> domain XML. When it's possible, it enables VFIO for PCI device
> assignment.
>
> On distribution with old Linux kernel, there are many limitations with
> PCI passthrough and it's hardly useful. This patch tries to adapt to old
> kernel but it's better to use a newer kernel with vfio support. Thus
> this patch also provide a new capability in /config/capabilities.
>
> curl -k -u root -H "Content-Type: application/json" \
> -H "Accept: application/json" \
> 'https://127.0.0.1:8001/config/capabilities'
>
> The above command should print following.
> {
> "blah": "blah",
> ...
> "kernel_vfio":true
> }
Just to make clear for a UI perspective: when "kernel_vfio" is false, we
should disable PCI passthrough on UI, right?
> v1:
> Handle the devices in the VM template.
>
> v2:
> Handle the devices in the VM sub-resource "hostdevs".
>
> v3:
> No change.
>
> v4:
> Not all domain XMLs contain hostdev node. Deal with the case.
>
> v5:
> Change _passthrough='1' to _passthrough='true'. When attaching and
> detaching a device, do not use VIR_DOMAIN_AFFECT_CURRENT flag, instead,
> use kimchi.model.utils.get_vm_config_flag() to correctly set the device
> flag.
>
> v11:
> Add Capability kernel_vfio to indicate if Linux kernel is new enough
> to support vfio.
>
> Signed-off-by: Zhou Zheng Sheng <zhshzhou at linux.vnet.ibm.com>
> ---
> src/kimchi/control/vm/hostdevs.py | 44 ++++++
> src/kimchi/featuretests.py | 10 +-
> src/kimchi/i18n.py | 7 +
> src/kimchi/model/config.py | 6 +-
> src/kimchi/model/vmhostdevs.py | 305 ++++++++++++++++++++++++++++++++++++++
> src/kimchi/rollbackcontext.py | 3 +
> 6 files changed, 373 insertions(+), 2 deletions(-)
> create mode 100644 src/kimchi/control/vm/hostdevs.py
> create mode 100644 src/kimchi/model/vmhostdevs.py
>
> diff --git a/src/kimchi/control/vm/hostdevs.py b/src/kimchi/control/vm/hostdevs.py
> new file mode 100644
> index 0000000..81fe8ec
> --- /dev/null
> +++ b/src/kimchi/control/vm/hostdevs.py
> @@ -0,0 +1,44 @@
> +#
> +# Project Kimchi
> +#
> +# Copyright IBM, Corp. 2014
> +#
> +# This library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# This library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with this library; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +
> +from kimchi.control.base import Collection, Resource
> +from kimchi.control.utils import UrlSubNode
> +
> +
> + at UrlSubNode("hostdevs")
> +class VMHostDevs(Collection):
> + def __init__(self, model, vmid):
> + super(VMHostDevs, self).__init__(model)
> + self.resource = VMHostDev
> + self.vmid = vmid
> + self.resource_args = [self.vmid, ]
> + self.model_args = [self.vmid, ]
> +
> +
> +class VMHostDev(Resource):
> + def __init__(self, model, vmid, ident):
> + super(VMHostDev, self).__init__(model, ident)
> + self.vmid = vmid
> + self.ident = ident
> + self.info = {}
Why that is needed?
> + self.model_args = [self.vmid, self.ident]
> +
> + @property
> + def data(self):
> + return self.info
> diff --git a/src/kimchi/featuretests.py b/src/kimchi/featuretests.py
> index 8964098..c1470fc 100644
> --- a/src/kimchi/featuretests.py
> +++ b/src/kimchi/featuretests.py
> @@ -29,7 +29,7 @@ from lxml.builder import E
>
>
> from kimchi.rollbackcontext import RollbackContext
> -from kimchi.utils import kimchi_log
> +from kimchi.utils import kimchi_log, run_command
>
>
> ISO_STREAM_XML = """
> @@ -206,3 +206,11 @@ class FeatureTests(object):
> return True
> except libvirt.libvirtError:
> return False
> +
> + @staticmethod
> + def kernel_support_vfio():
> + out, err, rc = run_command(['modprobe', 'vfio-pci'])
> + if rc != 0:
> + kimchi_log.warning("Unable to load Kernal module vfio-pci.")
> + return False
> + return True
> diff --git a/src/kimchi/i18n.py b/src/kimchi/i18n.py
> index 98adc46..ad65775 100644
> --- a/src/kimchi/i18n.py
> +++ b/src/kimchi/i18n.py
> @@ -98,6 +98,13 @@ messages = {
> "KCHVM0031E": _("The guest console password must be a string."),
> "KCHVM0032E": _("The life time for the guest console password must be a number."),
>
> + "KCHVMHDEV0001E": _("VM %(vmid)s does not contain directly assigned host device %(dev_name)s."),
> + "KCHVMHDEV0002E": _("The host device %(dev_name)s is not allowed to directly assign to VM."),
> + "KCHVMHDEV0003E": _("No IOMMU groups found. Host PCI pass through needs IOMMU group to function correctly. "
> + "Please enable Intel VT-d or AMD IOMMU in your BIOS, then verify the Kernel is compiled with IOMMU support. "
> + "For Intel CPU, add intel_iommu=on to your Kernel parameter in /boot/grub2/grub.conf. "
> + "For AMD CPU, add iommu=pt iommu=1."),
> +
> "KCHVMIF0001E": _("Interface %(iface)s does not exist in virtual machine %(name)s"),
> "KCHVMIF0002E": _("Network %(network)s specified for virtual machine %(name)s does not exist"),
> "KCHVMIF0003E": _("Do not support guest interface hot plug attachment"),
> diff --git a/src/kimchi/model/config.py b/src/kimchi/model/config.py
> index 1c00cfe..9ffc53e 100644
> --- a/src/kimchi/model/config.py
> +++ b/src/kimchi/model/config.py
> @@ -53,6 +53,7 @@ class CapabilitiesModel(object):
> self.libvirt_stream_protocols = []
> self.fc_host_support = False
> self.metadata_support = False
> + self.kernel_vfio = False
>
> # Subscribe function to set host capabilities to be run when cherrypy
> # server is up
> @@ -66,6 +67,7 @@ class CapabilitiesModel(object):
> self.nfs_target_probe = FeatureTests.libvirt_support_nfs_probe()
> self.fc_host_support = FeatureTests.libvirt_support_fc_host()
> self.metadata_support = FeatureTests.has_metadata_support()
> + self.kernel_vfio = FeatureTests.kernel_support_vfio()
>
> self.libvirt_stream_protocols = []
> for p in ['http', 'https', 'ftp', 'ftps', 'tftp']:
> @@ -110,7 +112,9 @@ class CapabilitiesModel(object):
> 'system_report_tool': bool(report_tool),
> 'update_tool': update_tool,
> 'repo_mngt_tool': repo_mngt_tool,
> - 'federation': kconfig.get("server", "federation")}
> + 'federation': kconfig.get("server", "federation"),
> + 'kernel_vfio': self.kernel_vfio,
> + }
>
>
> class DistrosModel(object):
> diff --git a/src/kimchi/model/vmhostdevs.py b/src/kimchi/model/vmhostdevs.py
> new file mode 100644
> index 0000000..0d002b5
> --- /dev/null
> +++ b/src/kimchi/model/vmhostdevs.py
> @@ -0,0 +1,305 @@
> +#
> +# Project Kimchi
> +#
> +# Copyright IBM, Corp. 2014
> +#
> +# This library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# This library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with this library; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +
> +import glob
> +import os
> +
> +import libvirt
> +from lxml import etree, objectify
> +
> +from kimchi.exception import InvalidOperation, InvalidParameter, NotFoundError
> +from kimchi.model.config import CapabilitiesModel
> +from kimchi.model.host import DeviceModel, DevicesModel
> +from kimchi.model.utils import get_vm_config_flag
> +from kimchi.model.vms import DOM_STATE_MAP, VMModel
> +from kimchi.rollbackcontext import RollbackContext
> +from kimchi.utils import kimchi_log, run_command
> +
> +
> +class VMHostDevsModel(object):
> + def __init__(self, **kargs):
> + self.conn = kargs['conn']
> +
> + def get_list(self, vmid):
> + dom = VMModel.get_vm(vmid, self.conn)
> + xmlstr = dom.XMLDesc(0)
> + root = objectify.fromstring(xmlstr)
> + try:
> + hostdev = root.devices.hostdev
> + except AttributeError:
> + return []
> +
> + return [self._deduce_dev_name(e) for e in hostdev]
> +
> + @staticmethod
> + def _toint(num_str):
> + if num_str.startswith('0x'):
> + return int(num_str, 16)
> + elif num_str.startswith('0'):
> + return int(num_str, 8)
> + else:
> + return int(num_str)
> +
> + def _deduce_dev_name(self, e):
> + dev_types = {
> + 'pci': self._deduce_dev_name_pci,
> + 'scsi': self._deduce_dev_name_scsi,
> + 'usb': self._deduce_dev_name_usb,
> + }
> + return dev_types[e.attrib['type']](e)
> +
To avoid having a map for it:
return getattr(self, "_deduce_dev_name_%s" % type)(e)
> + def _deduce_dev_name_pci(self, e):
> + attrib = {}
> + for field in ('domain', 'bus', 'slot', 'function'):
> + attrib[field] = self._toint(e.source.address.attrib[field])
> + return 'pci_%(domain)04x_%(bus)02x_%(slot)02x_%(function)x' % attrib
> +
> + def _deduce_dev_name_scsi(self, e):
> + attrib = {}
> + for field in ('bus', 'target', 'unit'):
> + attrib[field] = self._toint(e.source.address.attrib[field])
> + attrib['host'] = self._toint(
> + e.source.adapter.attrib['name'][len('scsi_host'):])
> + return 'scsi_%(host)d_%(bus)d_%(target)d_%(unit)d' % attrib
> +
> + def _deduce_dev_name_usb(self, e):
> + dev_names = DevicesModel(conn=self.conn).get_list(_cap='usb_device')
> + usb_infos = [DeviceModel(conn=self.conn).lookup(dev_name)
> + for dev_name in dev_names]
> +
> + unknown_dev = None
> +
> + try:
> + evendor = self._toint(e.source.vendor.attrib['id'])
> + eproduct = self._toint(e.source.product.attrib['id'])
> + except AttributeError:
> + evendor = 0
> + eproduct = 0
> + else:
> + unknown_dev = 'usb_vendor_%s_product_%s' % (evendor, eproduct)
> +
> + try:
> + ebus = self._toint(e.source.address.attrib['bus'])
> + edevice = self._toint(e.source.address.attrib['device'])
> + except AttributeError:
> + ebus = -1
> + edevice = -1
> + else:
> + unknown_dev = 'usb_bus_%s_device_%s' % (ebus, edevice)
> +
> + for usb_info in usb_infos:
> + ivendor = self._toint(usb_info['vendor']['id'])
> + iproduct = self._toint(usb_info['product']['id'])
> + if evendor == ivendor and eproduct == iproduct:
> + return usb_info['name']
> + ibus = usb_info['bus']
> + idevice = usb_info['device']
> + if ebus == ibus and edevice == idevice:
> + return usb_info['name']
> + return unknown_dev
> +
> + def _passthrough_device_validate(self, dev_name):
> + eligible_dev_names = \
> + DevicesModel(conn=self.conn).get_list(_passthrough='true')
> + if dev_name not in eligible_dev_names:
> + raise InvalidParameter('KCHVMHDEV0002E', {'dev_name': dev_name})
> +
> + def create(self, vmid, params):
> + dev_name = params['name']
> + self._passthrough_device_validate(dev_name)
> + dev_info = DeviceModel(conn=self.conn).lookup(dev_name)
> + attach_device = {
> + 'pci': self._attach_pci_device,
> + 'scsi': self._attach_scsi_device,
> + 'usb_device': self._attach_usb_device,
> + }[dev_info['device_type']]
> + return attach_device(vmid, dev_info)
> +
Same here.
return getattr(self, "_attach_%s_device" %
dev_info['device_type'])(vmid, dev_info)
> + def _get_pci_device_xml(self, dev_info):
> + if 'detach_driver' not in dev_info:
> + dev_info['detach_driver'] = 'kvm'
> +
> + xmlstr = '''
> + <hostdev mode='subsystem' type='pci' managed='yes'>
> + <source>
> + <address domain='%(domain)s' bus='%(bus)s' slot='%(slot)s'
> + function='%(function)s'/>
> + </source>
> + <driver name='%(detach_driver)s'/>
> + </hostdev>''' % dev_info
We are on a movement to use etree.builder to create the XML.
It would be good to have it in that way too.
> + return xmlstr
> +
> + @staticmethod
> + def _validate_pci_passthrough_env():
> + # Linux kernel < 3.5 doesn't provide /sys/kernel/iommu_groups
> + if os.path.isdir('/sys/kernel/iommu_groups'):
> + if not glob.glob('/sys/kernel/iommu_groups/*'):
> + raise InvalidOperation("KCHVMHDEV0003E")
> +
> + # Enable virt_use_sysfs on RHEL6 and older distributions
> + # In recent Fedora, there is no virt_use_sysfs.
> + out, err, rc = run_command(['getsebool', 'virt_use_sysfs'])
> + if rc == 0 and out.rstrip('\n') != "virt_use_sysfs --> on":
> + out, err, rc = run_command(['setsebool', '-P',
> + 'virt_use_sysfs=on'])
> + if rc != 0:
> + kimchi_log.warning("Unable to turn on sebool virt_use_sysfs")
> +
> + def _attach_pci_device(self, vmid, dev_info):
> + self._validate_pci_passthrough_env()
> +
> + dom = VMModel.get_vm(vmid, self.conn)
> + # Due to libvirt limitation, we don't support live assigne device to
> + # vfio driver.
> + driver = ('vfio' if DOM_STATE_MAP[dom.info()[0]] == "shutoff" and
> + CapabilitiesModel().kernel_vfio else 'kvm')
> +
> + # Attach all PCI devices in the same IOMMU group
> + dev_model = DeviceModel(conn=self.conn)
> + devs_model = DevicesModel(conn=self.conn)
> + affected_names = devs_model.get_list(
> + _passthrough_affected_by=dev_info['name'])
> + passthrough_names = devs_model.get_list(
> + _cap='pci', _passthrough='true')
> + group_names = list(set(affected_names) & set(passthrough_names))
> + pci_infos = [dev_model.lookup(dev_name) for dev_name in group_names]
> + pci_infos.append(dev_info)
> +
> + device_flags = get_vm_config_flag(dom, mode='all')
> +
> + with RollbackContext() as rollback:
> + for pci_info in pci_infos:
> + pci_info['detach_driver'] = driver
> + xmlstr = self._get_pci_device_xml(pci_info)
> + try:
> + dom.attachDeviceFlags(xmlstr, device_flags)
> + except libvirt.libvirtError:
> + kimchi_log.error(
> + 'Failed to attach host device %s to VM %s: \n%s',
> + pci_info['name'], vmid, xmlstr)
> + raise
> + rollback.prependDefer(dom.detachDeviceFlags,
> + xmlstr, device_flags)
> + rollback.commitAll()
> +
> + return dev_info['name']
> +
> + def _get_scsi_device_xml(self, dev_info):
> + xmlstr = '''
> + <hostdev mode='subsystem' type='scsi' sgio='unfiltered'>
> + <source>
> + <adapter name='scsi_host%(host)s'/>
> + <address type='scsi' bus='%(bus)s' target='%(target)s'
> + unit='%(lun)s'/>
> + </source>
> + </hostdev>''' % dev_info
Same here about etree.builder
> + return xmlstr
> +
> + def _attach_scsi_device(self, vmid, dev_info):
> + xmlstr = self._get_scsi_device_xml(dev_info)
> + dom = VMModel.get_vm(vmid, self.conn)
> + dom.attachDeviceFlags(xmlstr, get_vm_config_flag(dom, mode='all'))
> + return dev_info['name']
> +
> + def _get_usb_device_xml(self, dev_info):
> + xmlstr = '''
> + <hostdev mode='subsystem' type='usb' managed='yes'>
> + <source startupPolicy='optional'>
> + <vendor id='%s'/>
> + <product id='%s'/>
> + <address bus='%s' device='%s'/>
> + </source>
> + </hostdev>''' % (dev_info['vendor']['id'], dev_info['product']['id'],
> + dev_info['bus'], dev_info['device'])
And all other related XML creation =)
> + return xmlstr
> +
> + def _attach_usb_device(self, vmid, dev_info):
> + xmlstr = self._get_usb_device_xml(dev_info)
> + dom = VMModel.get_vm(vmid, self.conn)
> + dom.attachDeviceFlags(xmlstr, get_vm_config_flag(dom, mode='all'))
> + return dev_info['name']
> +
> +
> +class VMHostDevModel(object):
> + def __init__(self, **kargs):
> + self.conn = kargs['conn']
> +
> + def lookup(self, vmid, dev_name):
> + dom = VMModel.get_vm(vmid, self.conn)
> + xmlstr = dom.XMLDesc(0)
> + root = objectify.fromstring(xmlstr)
> + try:
> + hostdev = root.devices.hostdev
> + except AttributeError:
> + raise NotFoundError('KCHVMHDEV0001E',
> + {'vmid': vmid, 'dev_name': dev_name})
> +
> + devsmodel = VMHostDevsModel(conn=self.conn)
> +
> + for e in hostdev:
> + deduced_name = devsmodel._deduce_dev_name(e)
> + if deduced_name == dev_name:
> + return {'name': dev_name, 'type': e.attrib['type']}
> +
> + raise NotFoundError('KCHVMHDEV0001E',
> + {'vmid': vmid, 'dev_name': dev_name})
> +
> + def delete(self, vmid, dev_name):
> + dom = VMModel.get_vm(vmid, self.conn)
> + xmlstr = dom.XMLDesc(0)
> + root = objectify.fromstring(xmlstr)
> +
> + try:
> + hostdev = root.devices.hostdev
> + except AttributeError:
> + raise NotFoundError('KCHVMHDEV0001E',
> + {'vmid': vmid, 'dev_name': dev_name})
> +
> + devsmodel = VMHostDevsModel(conn=self.conn)
> + pci_devs = [(devsmodel._deduce_dev_name(e), e) for e in hostdev
> + if e.attrib['type'] == 'pci']
> +
> + for e in hostdev:
> + if devsmodel._deduce_dev_name(e) == dev_name:
> + xmlstr = etree.tostring(e)
> + dom.detachDeviceFlags(
> + xmlstr, get_vm_config_flag(dom, mode='all'))
> + if e.attrib['type'] == 'pci':
> + self._delete_affected_pci_devices(dom, dev_name, pci_devs)
> + break
> + else:
> + raise NotFoundError('KCHVMHDEV0001E',
> + {'vmid': vmid, 'dev_name': dev_name})
> +
> + def _delete_affected_pci_devices(self, dom, dev_name, pci_devs):
> + dev_model = DeviceModel(conn=self.conn)
> + try:
> + dev_model.lookup(dev_name)
> + except NotFoundError:
> + return
> +
> + affected_names = set(
> + DevicesModel(
> + conn=self.conn).get_list(_passthrough_affected_by=dev_name))
> +
> + for pci_name, e in pci_devs:
> + if pci_name in affected_names:
> + xmlstr = etree.tostring(e)
> + dom.detachDeviceFlags(
> + xmlstr, get_vm_config_flag(dom, mode='all'))
> diff --git a/src/kimchi/rollbackcontext.py b/src/kimchi/rollbackcontext.py
> index 29c0235..2f3e8bc 100644
> --- a/src/kimchi/rollbackcontext.py
> +++ b/src/kimchi/rollbackcontext.py
> @@ -64,3 +64,6 @@ class RollbackContext(object):
>
> def prependDefer(self, func, *args, **kwargs):
> self._finally.insert(0, (func, args, kwargs))
> +
> + def commitAll(self):
> + self._finally = []
More information about the Kimchi-devel
mailing list