[Kimchi-devel] [PATCH v6 3/4] Host device passthrough: Directly assign and dissmis host device from VM

Mark Wu wudxw at linux.vnet.ibm.com
Tue Jun 17 02:41:08 UTC 2014


On 06/09/2014 05:28 PM, Zhou Zheng Sheng wrote:
> This patch enbales Kimchi's VM to use host devices directly, and it
> greatly improves the related device performance. The user can assign
> PCI, USB and SCSI LUN directly to VM, as long as the host supports one
> of Intel VT-d, AMD IOMMU or POWER sPAPR technology and runs a recent
> release of Linux kernel.
>
> This patch adds a sub-collection "hostdevs" to the URI vms/vm-name/.
> The front-end can GET
>    vms/vm-name/hostdevs
> and
>    vms/vm-name/hostdevs/dev-name
> or POST (assign)
>    vms/vm-name/hostdevs
> and DELETE (dismiss)
>    vms/vm-name/hostdevs/dev-name
>
> The eligible devices to assign are the devices listed by the URI
>    host/devices?_passthrough=1
> When assigning a host PCI device to VM, all the eligible PCI devices in
> the same IOMMU group are also automatically assigned, and vice versa
> when dismissing a host PIC device from the VM.
>
> Some examples:
>
> Assign a USB device:
> curl -k -u root -H "Content-Type: application/json" \
>    -H "Accept: application/json" \
>    -X POST -d '{"name": "usb_1_1_6"}' \
>    'https://127.0.0.1:8001/vms/rhel65/hostdevs'
>
> Assign a PCI device:
>    -d '{"name": "pci_0000_0d_00_0"}'
>
> Assign a SCSI LUN:
>    -d '{"name": "scsi_1_0_0_0"}'
>
> List assigned devices:
> curl -k -u root -H "Content-Type: application/json" \
>    -H "Accept: application/json" \
>    'https://127.0.0.1:8001/vms/rhel65/hostdevs'
> The above command should print following.
>    [
>      {
>        "type":"scsi",
>        "name":"scsi_1_0_0_0"
>      },
>      {
>        "type":"usb",
>        "name":"usb_1_1_6"
>      },
>      {
>        "type":"pci",
>        "name":"pci_0000_0d_00_0"
>      },
>      {
>        "type":"pci",
>        "name":"pci_0000_03_00_0"
>      }
>    ]
> Notice that the device pci_0000_03_00_0 is also assigned automatically.
>
> The assigned devices are hot-plugged to VM and also written to the
> domain XML. When it's possible, it enables VFIO for PCI device
> assignment.
>
> v1:
>    Handle the devices in the VM template.
>
> v2:
>    Handle the devices in the VM sub-resource "hostdevs".
>
> v3:
>    No change.
>
> v4:
>    Not all domain XMLs contain hostdev node. Deal with the case.
>
> v5:
>    Change _passthrough='1' to _passthrough='true'. When attaching and
> detaching a device, do not use VIR_DOMAIN_AFFECT_CURRENT flag, instead,
> use kimchi.model.utils.get_vm_config_flag() to correctly set the device
> flag.
>
> Signed-off-by: Zhou Zheng Sheng <zhshzhou at linux.vnet.ibm.com>
> ---
>   src/kimchi/control/vm/hostdevs.py |  44 ++++++
>   src/kimchi/featuretests.py        |  10 +-
>   src/kimchi/i18n.py                |   7 +
>   src/kimchi/model/config.py        |   2 +
>   src/kimchi/model/vmhostdevs.py    | 303 ++++++++++++++++++++++++++++++++++++++
>   src/kimchi/rollbackcontext.py     |   3 +
>   6 files changed, 368 insertions(+), 1 deletion(-)
>   create mode 100644 src/kimchi/control/vm/hostdevs.py
>   create mode 100644 src/kimchi/model/vmhostdevs.py
>
> diff --git a/src/kimchi/control/vm/hostdevs.py b/src/kimchi/control/vm/hostdevs.py
> new file mode 100644
> index 0000000..81fe8ec
> --- /dev/null
> +++ b/src/kimchi/control/vm/hostdevs.py
> @@ -0,0 +1,44 @@
> +#
> +# Project Kimchi
> +#
> +# Copyright IBM, Corp. 2014
> +#
> +# This library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# This library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with this library; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
> +
> +from kimchi.control.base import Collection, Resource
> +from kimchi.control.utils import UrlSubNode
> +
> +
> + at UrlSubNode("hostdevs")
> +class VMHostDevs(Collection):
> +    def __init__(self, model, vmid):
> +        super(VMHostDevs, self).__init__(model)
> +        self.resource = VMHostDev
> +        self.vmid = vmid
> +        self.resource_args = [self.vmid, ]
> +        self.model_args = [self.vmid, ]
> +
> +
> +class VMHostDev(Resource):
> +    def __init__(self, model, vmid, ident):
> +        super(VMHostDev, self).__init__(model, ident)
> +        self.vmid = vmid
> +        self.ident = ident
> +        self.info = {}
> +        self.model_args = [self.vmid, self.ident]
> +
> +    @property
> +    def data(self):
> +        return self.info
> diff --git a/src/kimchi/featuretests.py b/src/kimchi/featuretests.py
> index 5192361..74222bf 100644
> --- a/src/kimchi/featuretests.py
> +++ b/src/kimchi/featuretests.py
> @@ -29,7 +29,7 @@ from lxml.builder import E
>
>
>   from kimchi.rollbackcontext import RollbackContext
> -from kimchi.utils import kimchi_log
> +from kimchi.utils import kimchi_log, run_command
>
>
>   ISO_STREAM_XML = """
> @@ -206,3 +206,11 @@ class FeatureTests(object):
>                   return True
>               except libvirt.libvirtError:
>                   return False
> +
> +    @staticmethod
> +    def kernel_support_vfio():
> +        out, err, rc = run_command(['modprobe', 'vfio-pci'])
> +        if rc != 0:
> +            kimchi_log.warning("Unable to load Kernal module vfio-pci.")
> +            return False
> +        return True
> diff --git a/src/kimchi/i18n.py b/src/kimchi/i18n.py
> index 452ede2..4757001 100644
> --- a/src/kimchi/i18n.py
> +++ b/src/kimchi/i18n.py
> @@ -90,6 +90,13 @@ messages = {
>       "KCHVM0029E": _("Unable to shutdown virtual machine %(name)s. Details: %(err)s"),
>       "KCHVM0030E": _("Unable to get access metadata of virtual machine %(name)s. Details: %(err)s"),
>
> +    "KCHVMHDEV0001E": _("VM %(vmid)s does not contain directly assigned host device %(dev_name)s."),
> +    "KCHVMHDEV0002E": _("The host device %(dev_name)s is not allowed to directly assign to VM."),
> +    "KCHVMHDEV0003E": _("No IOMMU groups found. Host PCI pass through needs IOMMU group to function correctly. "
> +                        "Please enable Intel VT-d or AMD IOMMU in your BIOS, then verify the Kernel is compiled with IOMMU support. "
> +                        "For Intel CPU, add intel_iommu=on to your Kernel parameter in /boot/grub2/grub.conf. "
> +                        "For AMD CPU, add iommu=pt iommu=1."),
> +
>       "KCHVMIF0001E": _("Interface %(iface)s does not exist in virtual machine %(name)s"),
>       "KCHVMIF0002E": _("Network %(network)s specified for virtual machine %(name)s does not exist"),
>       "KCHVMIF0003E": _("Do not support guest interface hot plug attachment"),
> diff --git a/src/kimchi/model/config.py b/src/kimchi/model/config.py
> index 0ef0855..95c8e7e 100644
> --- a/src/kimchi/model/config.py
> +++ b/src/kimchi/model/config.py
> @@ -54,6 +54,7 @@ class CapabilitiesModel(object):
>           self.libvirt_stream_protocols = []
>           self.fc_host_support = False
>           self.metadata_support = False
> +        self.kernel_vfio = False
>
>           # Subscribe function to set host capabilities to be run when cherrypy
>           # server is up
> @@ -67,6 +68,7 @@ class CapabilitiesModel(object):
>           self.nfs_target_probe = FeatureTests.libvirt_support_nfs_probe()
>           self.fc_host_support = FeatureTests.libvirt_support_fc_host()
>           self.metadata_support = FeatureTests.has_metadata_support()
> +        self.kernel_vfio = FeatureTests.kernel_support_vfio()
>
>           self.libvirt_stream_protocols = []
>           for p in ['http', 'https', 'ftp', 'ftps', 'tftp']:
> diff --git a/src/kimchi/model/vmhostdevs.py b/src/kimchi/model/vmhostdevs.py
> new file mode 100644
> index 0000000..9e59513
> --- /dev/null
> +++ b/src/kimchi/model/vmhostdevs.py
> @@ -0,0 +1,303 @@
> +#
> +# Project Kimchi
> +#
> +# Copyright IBM, Corp. 2014
> +#
> +# This library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# This library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with this library; if not, write to the Free Software
> +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
> +
> +import glob
> +
> +import libvirt
> +from lxml import etree, objectify
> +
> +from kimchi.exception import InvalidOperation, InvalidParameter, NotFoundError
> +from kimchi.model.config import CapabilitiesModel
> +from kimchi.model.host import DeviceModel, DevicesModel
> +from kimchi.model.host import PassthroughAffectedDevicesModel
> +from kimchi.model.utils import get_vm_config_flag
> +from kimchi.model.vms import DOM_STATE_MAP, VMModel
> +from kimchi.rollbackcontext import RollbackContext
> +from kimchi.utils import kimchi_log, run_command
> +
> +
> +class VMHostDevsModel(object):
> +    def __init__(self, **kargs):
> +        self.conn = kargs['conn']
> +
> +    def get_list(self, vmid):
> +        dom = VMModel.get_vm(vmid, self.conn)
> +        xmlstr = dom.XMLDesc(0)
> +        root = objectify.fromstring(xmlstr)
> +        try:
> +            hostdev = root.devices.hostdev
> +        except AttributeError:
> +            return []
> +
> +        return [self._deduce_dev_name(e) for e in hostdev]
> +
> +    @staticmethod
> +    def _toint(num_str):
> +        if num_str.startswith('0x'):
> +            return int(num_str, 16)
> +        elif num_str.startswith('0'):
> +            return int(num_str, 8)
> +        else:
> +            return int(num_str)
> +
> +    def _deduce_dev_name(self, e):
> +        dev_types = {
> +            'pci': self._deduce_dev_name_pci,
> +            'scsi': self._deduce_dev_name_scsi,
> +            'usb': self._deduce_dev_name_usb,
> +            }
> +        return dev_types[e.attrib['type']](e)
> +
> +    def _deduce_dev_name_pci(self, e):
> +        attrib = {}
> +        for field in ('domain', 'bus', 'slot', 'function'):
> +            attrib[field] = self._toint(e.source.address.attrib[field])
> +        return 'pci_%(domain)04x_%(bus)02x_%(slot)02x_%(function)x' % attrib
> +
> +    def _deduce_dev_name_scsi(self, e):
> +        attrib = {}
> +        for field in ('bus', 'target', 'unit'):
> +            attrib[field] = self._toint(e.source.address.attrib[field])
> +        attrib['host'] = self._toint(
> +            e.source.adapter.attrib['name'][len('scsi_host'):])
> +        return 'scsi_%(host)d_%(bus)d_%(target)d_%(unit)d' % attrib
> +
> +    def _deduce_dev_name_usb(self, e):
> +        dev_names = DevicesModel(conn=self.conn).get_list(_cap='usb_device')
> +        usb_infos = [DeviceModel(conn=self.conn).lookup(dev_name)
> +                     for dev_name in dev_names]
> +
> +        unknown_dev = None
> +
> +        try:
> +            evendor = self._toint(e.source.vendor.attrib['id'])
> +            eproduct = self._toint(e.source.product.attrib['id'])
> +        except AttributeError:
> +            evendor = 0
> +            eproduct = 0
> +        else:
> +            unknown_dev = 'usb_vendor_%s_product_%s' % (evendor, eproduct)
> +
> +        try:
> +            ebus = self._toint(e.source.address.attrib['bus'])
> +            edevice = self._toint(e.source.address.attrib['device'])
> +        except AttributeError:
> +            ebus = -1
> +            edevice = -1
> +        else:
> +            unknown_dev = 'usb_bus_%s_device_%s' % (ebus, edevice)
> +
> +        for usb_info in usb_infos:
> +            ivendor = self._toint(usb_info['vendor']['id'])
> +            iproduct = self._toint(usb_info['product']['id'])
> +            if evendor == ivendor and eproduct == iproduct:
> +                return usb_info['name']
> +            ibus = usb_info['bus']
> +            idevice = usb_info['device']
> +            if ebus == ibus and edevice == idevice:
> +                return usb_info['name']
> +        return unknown_dev
> +
> +    def _passthrough_device_validate(self, dev_name):
> +        eligible_dev_names = \
> +            DevicesModel(conn=self.conn).get_list(_passthrough='true')
> +        if dev_name not in eligible_dev_names:
> +            raise InvalidParameter('KCHVMHDEV0002E', {'dev_name': dev_name})
> +
> +    def create(self, vmid, params):
> +        dev_name = params['name']
> +        self._passthrough_device_validate(dev_name)
> +        dev_info = DeviceModel(conn=self.conn).lookup(dev_name)
> +        attach_device = {
> +            'pci': self._attach_pci_device,
> +            'scsi': self._attach_scsi_device,
> +            'usb_device': self._attach_usb_device,
> +            }[dev_info['device_type']]
> +        return attach_device(vmid, dev_info)
> +
> +    def _get_pci_device_xml(self, dev_info):
> +        if 'detach_driver' not in dev_info:
> +            dev_info['detach_driver'] = 'kvm'
> +
> +        xmlstr = '''
> +        <hostdev mode='subsystem' type='pci' managed='yes'>
> +          <source>
> +            <address domain='%(domain)s' bus='%(bus)s' slot='%(slot)s'
> +             function='%(function)s'/>
> +          </source>
> +          <driver name='%(detach_driver)s'/>
> +        </hostdev>''' % dev_info
> +        return xmlstr
> +
> +    @staticmethod
> +    def _validate_pci_passthrough_env():
> +        if not glob.glob('/sys/kernel/iommu_groups/*'):
> +            raise InvalidOperation("KCHVMHDEV0003E")
> +
> +        # Enable virt_use_sysfs on RHEL6 and older distributions
> +        # In recent Fedora, there is no virt_use_sysfs.
> +        out, err, rc = run_command(['getsebool', 'virt_use_sysfs'])
> +        if rc == 0 and out.rstrip('\n') != "virt_use_sysfs --> on":
> +            out, err, rc = run_command(['setsebool', '-P',
> +                                        'virt_use_sysfs=on'])
> +            if rc != 0:
> +                kimchi_log.warning("Unable to turn on sebool virt_use_sysfs")
> +
> +    def _attach_pci_device(self, vmid, dev_info):
> +        self._validate_pci_passthrough_env()
> +
> +        dom = VMModel.get_vm(vmid, self.conn)
> +        # Due to libvirt limitation, we don't support live assigne device to
> +        # vfio driver.
> +        driver = ('vfio' if DOM_STATE_MAP[dom.info()[0]] == "shutoff" and
> +                  CapabilitiesModel().kernel_vfio else 'kvm')
> +
> +        # Attach all PCI devices in the same IOMMU group
> +        dev_model = DeviceModel(conn=self.conn)
> +        affected_devs_model = PassthroughAffectedDevicesModel(conn=self.conn)
> +        dev_infos = [dev_model.lookup(dev_name) for dev_name in
> +                     affected_devs_model.get_list(dev_info['name'])]
> +        pci_infos = [dev_info] + [info for info in dev_infos
> +                                  if info['device_type'] == 'pci']
> +
> +        device_flags = get_vm_config_flag(dom, mode='all')
> +
> +        with RollbackContext() as rollback:
> +            for pci_info in pci_infos:
> +                pci_info['detach_driver'] = driver
> +                xmlstr = self._get_pci_device_xml(pci_info)
> +                try:
> +                    dom.attachDeviceFlags(xmlstr, device_flags)
> +                except libvirt.libvirtError:
> +                    kimchi_log.error(
> +                        'Failed to attach host device %s to VM %s: \n%s',
> +                        pci_info['name'], vmid, xmlstr)
> +                    raise
> +                rollback.prependDefer(dom.detachDeviceFlags,
> +                                      xmlstr, device_flags)
> +            rollback.commitAll()
> +
> +        return dev_info['name']
> +
> +    def _get_scsi_device_xml(self, dev_info):
> +        xmlstr = '''
> +        <hostdev mode='subsystem' type='scsi' sgio='unfiltered'>
> +          <source>
> +            <adapter name='scsi_host%(host)s'/>
> +            <address type='scsi' bus='%(bus)s' target='%(target)s'
> +             unit='%(lun)s'/>
> +          </source>
> +        </hostdev>''' % dev_info
> +        return xmlstr
> +
> +    def _attach_scsi_device(self, vmid, dev_info):
> +        xmlstr = self._get_scsi_device_xml(dev_info)
> +        dom = VMModel.get_vm(vmid, self.conn)
> +        dom.attachDeviceFlags(xmlstr, get_vm_config_flag(dom, mode='all'))
> +        return dev_info['name']
> +
> +    def _get_usb_device_xml(self, dev_info):
> +        xmlstr = '''
> +        <hostdev mode='subsystem' type='usb' managed='yes'>
> +          <source startupPolicy='optional'>
> +            <vendor id='%s'/>
> +            <product id='%s'/>
> +            <address bus='%s' device='%s'/>
> +          </source>
> +        </hostdev>''' % (dev_info['vendor']['id'], dev_info['product']['id'],
> +                         dev_info['bus'], dev_info['device'])
> +        return xmlstr
> +
> +    def _attach_usb_device(self, vmid, dev_info):
> +        xmlstr = self._get_usb_device_xml(dev_info)
> +        dom = VMModel.get_vm(vmid, self.conn)
> +        dom.attachDeviceFlags(xmlstr, get_vm_config_flag(dom, mode='all'))
> +        return dev_info['name']
> +
> +
> +class VMHostDevModel(object):
> +    def __init__(self, **kargs):
> +        self.conn = kargs['conn']
> +
> +    def lookup(self, vmid, dev_name):
> +        dom = VMModel.get_vm(vmid, self.conn)
> +        xmlstr = dom.XMLDesc(0)
> +        root = objectify.fromstring(xmlstr)
> +        try:
> +            hostdev = root.devices.hostdev
> +        except AttributeError:
> +            raise NotFoundError('KCHVMHDEV0001E',
> +                                {'vmid': vmid, 'dev_name': dev_name})
> +
> +        devsmodel = VMHostDevsModel(conn=self.conn)
> +
> +        for e in hostdev:
> +            deduced_name = devsmodel._deduce_dev_name(e)
> +            if deduced_name == dev_name:
> +                return {'name': dev_name, 'type': e.attrib['type']}
> +
> +        raise NotFoundError('KCHVMHDEV0001E',
> +                            {'vmid': vmid, 'dev_name': dev_name})
> +
> +    def delete(self, vmid, dev_name):
> +        dom = VMModel.get_vm(vmid, self.conn)
> +        xmlstr = dom.XMLDesc(0)
> +        root = objectify.fromstring(xmlstr)
> +        pci_devs = []
> +
> +        try:
> +            hostdev = root.devices.hostdev
> +        except AttributeError:
> +            raise NotFoundError('KCHVMHDEV0001E',
> +                                {'vmid': vmid, 'dev_name': dev_name})
> +
> +        devsmodel = VMHostDevsModel(conn=self.conn)
> +
> +        for e in hostdev:
> +            deduced_name = devsmodel._deduce_dev_name(e)
> +            if e.attrib['type'] == 'pci':
> +                pci_devs.append((deduced_name, e))
> +            if deduced_name == dev_name:
> +                dev_e = e
> +                xmlstr = etree.tostring(e)
> +                dom.detachDeviceFlags(
> +                    xmlstr, get_vm_config_flag(dom, mode='all'))
> +                break
> +        else:
> +            raise NotFoundError('KCHVMHDEV0001E',
> +                                {'vmid': vmid, 'dev_name': dev_name})
> +
> +        if dev_e.attrib['type'] == 'pci':
> +            self._delete_affected_pci_devices(dom, dev_name, pci_devs)
> +
> +    def _delete_affected_pci_devices(self, dom, dev_name, pci_devs):
> +        dev_model = DeviceModel(conn=self.conn)
> +        try:
> +            dev_model.lookup(dev_name)
> +        except NotFoundError:
> +            return
> +
> +        affected_names = set(
> +            PassthroughAffectedDevicesModel(conn=self.conn).get_list(dev_name))
> +
> +        for pci_name, e in pci_devs:
> +            if pci_name in affected_names:
> +                xmlstr = etree.tostring(e)
> +                dom.detachDeviceFlags(
> +                    xmlstr, get_vm_config_flag(dom, mode='all'))
> diff --git a/src/kimchi/rollbackcontext.py b/src/kimchi/rollbackcontext.py
> index 70fcfeb..ba28999 100644
> --- a/src/kimchi/rollbackcontext.py
> +++ b/src/kimchi/rollbackcontext.py
> @@ -64,3 +64,6 @@ class RollbackContext(object):
>
>       def prependDefer(self, func, *args, **kwargs):
>           self._finally.insert(0, (func, args, kwargs))
> +
> +    def commitAll(self):
> +        self._finally = []
Reviewed-by: Mark Wu<wudxw at linux.vnet.ibm.com>




More information about the Kimchi-devel mailing list