[Kimchi-devel] [PATCH 4/4] host pci pass through: pass through PCI device to VM

Zhou Zheng Sheng zhshzhou at linux.vnet.ibm.com
Wed Apr 23 09:48:12 UTC 2014


This patch enbales Kimchi's VM to use host PCI device directly, so
that it greatly improves the related device performance. The user can
pass through HBA card, NIC, USB controller, SCSI controller or other PCI
devices to VM, as long as the host supports one of Intel VT-d, AMD IOMMU
or POWER sPAPR technology and runs a recent release of Linux kernel.

To try this feature, firstly create a VM template as usual, either in
the web UI or by running the curl command.

  curl -u root -H "Content-Type: application/json" \
    -H  "Accept: application/json" -X POST \
    "http://localhost:8000/templates" \
    -d '{"os_distro":"fedora", "os_version":"20", "cdrom":"/iso/Fedora-Live-Desktop-x86_64-20-1.iso"}'

To determine the PCI devices to pass through, get a list of host PCI device.

  curl -u root -H "Content-Type: application/json" \
    -H  "Accept: application/json" \
    http://127.0.0.1:8000/host/devices?_cap=pci

It would print a list like following.
[
  {
    "slot":"0",
    "function":"0",
    "domain":"0",
    "vendor":{"id":"0x8086", "name":"Intel Corporation"},
    "name":"pci_0000_00_00_0",
    "parent":"computer",
    "bus":"0",
    "product":{"id":"0x0104", "description":"... DRAM Controller"
    },
    "path":"/sys/devices/pci0000:00/0000:00:00.0",
    "type":"pci",
    "iommuGroup":"0"
  },
  {
    "slot":"1",
    "function":"0",
    "domain":"0",
    "vendor":{"id":"0x8086", "name":"Intel Corporation"},
    "name":"pci_0000_00_01_0",
    "parent":"computer",
    "bus":"0",
    "product":{"id":"0x0101", "description":"... PCI Express Root Port"},
    "path":"/sys/devices/pci0000:00/0000:00:01.0",
    "type":"pci",
    "iommuGroup":"1"
  },
  ...
]

Actually all the devices form a tree in the host, passing through a PCI
device means passing all the sub-device linked to this PCI device. To
get an idea of the device tree, we can access the following URL.

  curl -u root -H "Content-Type: application/json" \
    -H  "Accept: application/json" \
    http://127.0.0.1:8000/host/devices?_tree=1

The output is not human readable, but in future the front-end should
parse the output into a graph. For now, as a debugging output, you can
run one of the following command to get a human readable tree.

  Alternative 1: PYTHONPATH=src python -m kimchi.hostdev | less
  Alternative 2: virsh nodedev-list --tree

Then take down the 'name' attribute of the PCI device, such as
"pci_0000_0d_00_0" or "pci_0000_03_00_0".

Now update the template to define the PCI devices to pass through.

  curl -u root -H "Content-Type: application/json" \
    -H  "Accept: application/json" -X PUT \
    "http://localhost:8000/templates/Fedora-Live-Desktop-x86_64-20-11398235858253" \
    -d '{"host_pci_devs": ["pci_0000_0d_00_0", "pci_0000_03_00_0"]}'

Then create a VM from the template, and start the VM. libvirt would
automatically detach the PCI devices from host, and attach them to
vfio-pci or pci-stub. The guest OS should be able to use those devices,
and "lspci" in guest OS should show the devices information. After the
VM is powered off, libvirt would automatically re-attach the devices to
their original host drivers.

The author was able to test this feature by passing through the wireless
NIC and an SD card reader to the VM.

Signed-off-by: Zhou Zheng Sheng <zhshzhou at linux.vnet.ibm.com>
---
 src/kimchi/API.json             | 18 +++++++++++--
 src/kimchi/control/templates.py |  5 ++--
 src/kimchi/featuretests.py      |  9 +++++++
 src/kimchi/i18n.py              | 11 ++++++++
 src/kimchi/model/config.py      |  2 ++
 src/kimchi/model/templates.py   | 58 +++++++++++++++++++++++++++++++++++++++++
 src/kimchi/vmtemplate.py        | 43 ++++++++++++++++++++++++++++++
 7 files changed, 142 insertions(+), 4 deletions(-)

diff --git a/src/kimchi/API.json b/src/kimchi/API.json
index f2192b6..93478ae 100644
--- a/src/kimchi/API.json
+++ b/src/kimchi/API.json
@@ -26,6 +26,18 @@
                         ]
                 }
             }
+        },
+        "host_pci_devs": {
+            "description": "Configure host PCI devices to be passed through to VM",
+            "type": "array",
+            "uniqueItems": true,
+            "error": "KCHTMPL0027E",
+            "items": {
+                "description": "Name of the PCI device",
+                "pattern": "^[A-Za-z0-9_]+$",
+                "type": "string",
+                "error": "KCHTMPL0028E"
+            }
         }
     },
     "properties": {
@@ -376,7 +388,8 @@
                     "type": "array",
                     "items": { "type": "string" }
                 },
-                "graphics": { "$ref": "#/kimchitype/graphics" }
+                "graphics": { "$ref": "#/kimchitype/graphics" },
+                "host_pci_devs": {"$ref": "#/kimchitype/host_pci_devs"}
             },
             "additionalProperties": false,
             "error": "KCHAPI0001E"
@@ -530,7 +543,8 @@
                     "type": "array",
                     "items": { "type": "string" }
                 },
-                "graphics": { "$ref": "#/kimchitype/graphics" }
+                "graphics": { "$ref": "#/kimchitype/graphics" },
+                "host_pci_devs": {"$ref": "#/kimchitype/host_pci_devs"}
             },
             "additionalProperties": false,
             "error": "KCHAPI0001E"
diff --git a/src/kimchi/control/templates.py b/src/kimchi/control/templates.py
index a535960..7048d1b 100644
--- a/src/kimchi/control/templates.py
+++ b/src/kimchi/control/templates.py
@@ -34,7 +34,7 @@ class Template(Resource):
         self.update_params = ["name", "folder", "icon", "os_distro",
                               "storagepool", "os_version", "cpus",
                               "memory", "cdrom", "disks", "networks",
-                              "graphics"]
+                              "graphics", "host_pci_devs"]
         self.uri_fmt = "/templates/%s"
         self.clone = self.generate_action_handler('clone')
 
@@ -52,4 +52,5 @@ class Template(Resource):
                 'storagepool': self.info['storagepool'],
                 'networks': self.info['networks'],
                 'folder': self.info.get('folder', []),
-                'graphics': self.info['graphics']}
+                'graphics': self.info['graphics'],
+                'host_pci_devs': self.info.get('host_pci_devs', [])}
diff --git a/src/kimchi/featuretests.py b/src/kimchi/featuretests.py
index 07d5676..c906347 100644
--- a/src/kimchi/featuretests.py
+++ b/src/kimchi/featuretests.py
@@ -29,6 +29,7 @@ from lxml.builder import E
 
 
 from kimchi.utils import kimchi_log
+from kimchi.utils import run_command
 
 
 ISO_STREAM_XML = """
@@ -175,3 +176,11 @@ class FeatureTests(object):
             pool is None or pool.undefine()
             conn is None or conn.close()
         return True
+
+    @staticmethod
+    def kernel_support_vfio():
+        out, err, rc = run_command(['modprobe', 'vfio-pci'])
+        if rc != 0:
+            kimchi_log.warning("Unable to load Kernal module vfio-pci.")
+            return False
+        return True
diff --git a/src/kimchi/i18n.py b/src/kimchi/i18n.py
index 955aff2..d70c863 100644
--- a/src/kimchi/i18n.py
+++ b/src/kimchi/i18n.py
@@ -119,6 +119,17 @@ messages = {
     "KCHTMPL0022E": _("Disk size must be greater than 1GB."),
     "KCHTMPL0023E": _("No root device found. Maybe you should stop filtering "
                       "devices to avoid filtering out the root device of the device tree."),
+    "KCHTMPL0024E": _("Device '%(dev)s' of type '%(type)s' is not a PCI device."),
+    "KCHTMPL0025E": _("No IOMMU groups found. Host PCI pass through needs "
+                      "IOMMU group to function correctly. Please enable Intel "
+                      "VT-d or AMD IOMMU in your BIOS, then verify the Kernel "
+                      "is compiled with IOMMU support. For Intel CPU, add "
+                      "intel_iommu=on to your Kernel parameter in "
+                      "/boot/grub2/grub.conf. For AMD CPU, add iommu=pt iommu=1"),
+    "KCHTMPL0026E": _("Unable to detach the device %(dev)s from host. It may be "
+                      "in use by the host or other guest."),
+    "KCHTMPL0027E": _("PCI device names must be unique."),
+    "KCHTMPL0028E": _("PCI device name should contain only alphabetic letters, digits and underscore."),
 
     "KCHPOOL0001E": _("Storage pool %(name)s already exists"),
     "KCHPOOL0002E": _("Storage pool %(name)s does not exist"),
diff --git a/src/kimchi/model/config.py b/src/kimchi/model/config.py
index 7081373..12aea82 100644
--- a/src/kimchi/model/config.py
+++ b/src/kimchi/model/config.py
@@ -53,6 +53,7 @@ class CapabilitiesModel(object):
         self.qemu_stream_dns = False
         self.libvirt_stream_protocols = []
         self.fc_host_support = False
+        self.kernel_vfio = False
 
         # Subscribe function to set host capabilities to be run when cherrypy
         # server is up
@@ -65,6 +66,7 @@ class CapabilitiesModel(object):
         self.qemu_stream_dns = FeatureTests.qemu_iso_stream_dns()
         self.nfs_target_probe = FeatureTests.libvirt_support_nfs_probe()
         self.fc_host_support = FeatureTests.libvirt_support_fc_host()
+        self.kernel_vfio = FeatureTests.kernel_support_vfio()
 
         self.libvirt_stream_protocols = []
         for p in ['http', 'https', 'ftp', 'ftps', 'tftp']:
diff --git a/src/kimchi/model/templates.py b/src/kimchi/model/templates.py
index 60f4de5..0acbeb9 100644
--- a/src/kimchi/model/templates.py
+++ b/src/kimchi/model/templates.py
@@ -19,16 +19,20 @@
 
 import copy
 import os
+import glob
 import time
 
 import libvirt
 
+from kimchi import hostdev
 from kimchi import xmlutils
 from kimchi.exception import InvalidOperation, InvalidParameter
 from kimchi.exception import NotFoundError, OperationFailed
+from kimchi.model.config import CapabilitiesModel
 from kimchi.kvmusertests import UserTests
 from kimchi.utils import pool_name_from_uri
 from kimchi.utils import probe_file_permission_as_user
+from kimchi.utils import run_command
 from kimchi.vmtemplate import VMTemplate
 from lxml import objectify
 
@@ -74,6 +78,9 @@ class TemplatesModel(object):
             except Exception:
                 raise InvalidParameter("KCHTMPL0003E", {'network': net_name,
                                                         'template': name})
+
+        self.host_pci_device_validate(params.get(u'host_pci_devs', []))
+
         # Creates the template class with necessary information
         # Checkings will be done while creating this class, so any exception
         # will be raised here
@@ -93,6 +100,19 @@ class TemplatesModel(object):
         with self.objstore as session:
             return session.get_list('template')
 
+    def host_pci_device_validate(self, host_pci_devs):
+        conn = self.conn.get()
+        for dev_name in host_pci_devs:
+            try:
+                dev = conn.nodeDeviceLookupByName(dev_name)
+            except:
+                raise NotFoundError('KCHHOST0003E', {'name': dev_name})
+            dev_info = hostdev.get_dev_info(dev)
+            if dev_info['type'] != 'pci':
+                raise InvalidParameter("KCHTMPL0024E",
+                                       {'dev': dev_name,
+                                        'type': dev_info['type']})
+
     def template_volume_validate(self, tmp_volumes, pool):
         kwargs = {'conn': self.conn, 'objstore': self.objstore}
         pool_type = xmlutils.xpath_get_text(pool.XMLDesc(0), "/pool/@type")[0]
@@ -186,6 +206,9 @@ class TemplateModel(object):
                 raise InvalidParameter("KCHTMPL0003E", {'network': net_name,
                                                         'template': name})
 
+        self.templates.host_pci_device_validate(new_t.get(u'host_pci_devs',
+                                                          []))
+
         self.delete(name)
         try:
             ident = self.templates.create(new_t)
@@ -225,6 +248,41 @@ class LibvirtVMTemplate(VMTemplate):
         names = conn.listStoragePools() + conn.listDefinedStoragePools()
         return sorted(map(lambda x: x.decode('utf-8'), names))
 
+    def _get_all_host_devs_name(self, cap=None):
+        conn = self.conn.get()
+        return conn.listDevices(cap, 0)
+
+    def _host_pci_devs_validate(self):
+        ''' Validate all PCI pass through devices and return device
+        information. '''
+
+        dev_names = self.info.get('host_pci_devs', [])
+        if not dev_names:
+            return None
+
+        if not glob.glob('/sys/kernel/iommu_groups/*'):
+            raise NotFoundError("KCHTMPL0025E")
+
+        out, err, rc = run_command(['getsebool', 'virt_use_sysfs'])
+        if rc == 0:
+            if out.rstrip('\n') != "virt_use_sysfs --> on":
+                out, err, rc = run_command(['setsebool', '-P',
+                                            'virt_use_sysfs=on'])
+
+        driver = 'vfio' if CapabilitiesModel().kernel_vfio else 'kvm'
+        conn = self.conn.get()
+        dev_infos = []
+        for dev_name in dev_names:
+            dev = conn.nodeDeviceLookupByName(dev_name)
+            try:
+                dev.detachFlags(driver)
+            except libvirt.libvirtError:
+                raise InvalidParameter('KCHTMPL0026E', {'dev': dev_name})
+            dev.reAttach()
+            dev_infos.append(hostdev.get_dev_info(dev))
+
+        return {'driver': driver, 'devs': dev_infos}
+
     def _network_validate(self):
         names = self.info['networks']
         for name in names:
diff --git a/src/kimchi/vmtemplate.py b/src/kimchi/vmtemplate.py
index 8d5217a..750d236 100644
--- a/src/kimchi/vmtemplate.py
+++ b/src/kimchi/vmtemplate.py
@@ -299,6 +299,29 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
             input_output += sound % self.info
         return input_output
 
+    def _get_host_pci_passthrough_xml(self):
+        r = self._host_pci_devs_validate()
+        if r is None:
+            return ''
+
+        driver = r['driver']
+        xmls = []
+        for dev_info in r['devs']:
+            xmlstr = '''
+                <hostdev mode='subsystem' type='pci' managed='yes'>
+                    <source>
+                        <address domain='%(domain)s' bus='%(bus)s' \
+                            slot='%(slot)s' function='%(function)s'/>
+                    </source>
+                    <driver name='%(driver)s'/>
+                </hostdev>''' % {
+                'domain': dev_info['domain'], 'bus': dev_info['bus'],
+                'slot': dev_info['slot'], 'function': dev_info['function'],
+                'driver': driver}
+            xmls.append(xmlstr)
+
+        return ''.join(xmls)
+
     def to_vm_xml(self, vm_name, vm_uuid, **kwargs):
         params = dict(self.info)
         params['name'] = vm_name
@@ -333,6 +356,8 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
         else:
             params['cdroms'] = cdrom_xml
 
+        params['host_pci_devs'] = self._get_host_pci_passthrough_xml()
+
         xml = """
         <domain type='%(domain)s' %(qemu-namespace)s>
           %(qemu-stream-cmdline)s
@@ -360,6 +385,7 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
             %(networks)s
             %(graphics)s
             %(input_output)s
+            %(host_pci_devs)s
             <memballoon model='virtio' />
           </devices>
         </domain>
@@ -370,6 +396,7 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
         self._storage_validate()
         self._network_validate()
         self._iso_validate()
+        self._host_pci_devs_validate()
 
     def _iso_validate(self):
         pass
@@ -380,6 +407,14 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
     def _storage_validate(self):
         pass
 
+    def _host_pci_devs_validate(self):
+        ''' Should return a dictionary containing two items as following.
+        {"driver": "kvm"/"vfio"/...,
+         "devs": [{dev_info0...}, {dev_info1...}, ...]}
+        Returning None means no host PCI device to pass through.
+        '''
+        pass
+
     def fork_vm_storage(self, vm_uuid):
         pass
 
@@ -398,6 +433,9 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
     def _get_all_storagepools_name(self):
         return []
 
+    def _get_all_host_devs_name(self, cap=None):
+        return []
+
     def validate_integrity(self):
         invalid = {}
         # validate networks integrity
@@ -418,6 +456,11 @@ drive=drive-%(bus)s0-1-0,id=%(bus)s0-1-0'/>
         if not (os.path.isfile(iso) or check_url_path(iso)):
             invalid['cdrom'] = [iso]
 
+        invalid_pci_devs = list(set(self.info.get('host_pci_devs', [])) -
+                                set(self._get_all_host_devs_name(cap='pci')))
+        if invalid_pci_devs:
+            invalid['host_pci_devs'] = invalid_pci_devs
+
         self.info['invalid'] = invalid
 
         return self.info
-- 
1.9.0




More information about the Kimchi-devel mailing list