[PATCH v2 0/3] Github bug #327 - NFS workaround

From: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> Changelog: v2: - Addressed Aline's comments about code repetition This patch series fixes the bug described in https://github.com/kimchi-project/kimchi/issues/327. If there are any NFS pools created and the NFS server is unreachable, kimchi will hang due to a malfunction of libvirt in this specific scenario. The proposed workaround is to avoid any libvirt operation in the NFS pool that fits this scenario, treating the pool as 'inaccessible' and warning the UI. Trying to activate/ deactivate the nfs pool at this case will take no effect, warning the user that the NFS server is unreachable. Daniel Henrique Barboza (3): Github bug #327: NFS pool workaround: i18n changes Github bug #327: NFS pool workaround: timeout adjustments Github bug #327: NFS pool workaround: model changes src/kimchi/i18n.py | 2 ++ src/kimchi/model/libvirtstoragepool.py | 9 ++++-- src/kimchi/model/storagepools.py | 52 +++++++++++++++++++++++++++++++++- src/kimchi/utils.py | 2 +- 4 files changed, 60 insertions(+), 5 deletions(-) -- 1.8.3.1

From: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> Adding the warning/status messages used by the workaround Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> --- src/kimchi/i18n.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/kimchi/i18n.py b/src/kimchi/i18n.py index fea0184..ca72a80 100644 --- a/src/kimchi/i18n.py +++ b/src/kimchi/i18n.py @@ -135,6 +135,8 @@ messages = { "KCHPOOL0029E": _("The parameter disks only can be updated for logical storage pool."), "KCHPOOL0030E": _("The SCSI host adapter name must be a string."), "KCHPOOL0031E": _("The storage pool kimchi_isos is reserved for internal use"), + "KCHPOOL0032E": _("Unable to activate NFS storage pool %(name)s. NFS server %(server)s is unreachable."), + "KCHPOOL0033E": _("Unable to deactivate NFS storage pool %(name)s. NFS server %(server)s is unreachable."), "KCHVOL0001E": _("Storage volume %(name)s already exists"), "KCHVOL0002E": _("Storage volume %(name)s does not exist in storage pool %(pool)s"), -- 1.8.3.1

From: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> Adding a smaller timeout in the 'prepare()' method of libvirtstoragepool ('mount' and 'umount' commands). Fixing an error in utils/run_command() that prevented the TimeoutExpired exception to be raised sometimes. Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> --- src/kimchi/model/libvirtstoragepool.py | 9 ++++++--- src/kimchi/utils.py | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/kimchi/model/libvirtstoragepool.py b/src/kimchi/model/libvirtstoragepool.py index dc38d50..e69971b 100644 --- a/src/kimchi/model/libvirtstoragepool.py +++ b/src/kimchi/model/libvirtstoragepool.py @@ -94,15 +94,18 @@ class NetfsPoolDef(StoragePoolDef): export_path, mnt_point] umount_cmd = ["umount", "-f", export_path] mounted = False + # 2 seconds looks like a reasonable time to wait for a refresh + # in the UI and enough time to verify that the NFS server + # is down. + cmd_timeout = 2 with RollbackContext() as rollback: rollback.prependDefer(os.rmdir, mnt_point) try: - run_command(mount_cmd, 30) - rollback.prependDefer(run_command, umount_cmd) + run_command(mount_cmd, cmd_timeout) + rollback.prependDefer(run_command, umount_cmd, cmd_timeout) except TimeoutExpired: raise InvalidParameter("KCHPOOL0012E", {'path': export_path}) - with open("/proc/mounts", "rb") as f: rawMounts = f.read() output_items = ['dev_path', 'mnt_point', 'type'] diff --git a/src/kimchi/utils.py b/src/kimchi/utils.py index 6be1c04..d4ab1a1 100644 --- a/src/kimchi/utils.py +++ b/src/kimchi/utils.py @@ -177,7 +177,7 @@ def run_command(cmd, timeout=None): "timeout %s seconds" % timeout) kimchi_log.error(msg) - msg_args = {'cmd': cmd, 'seconds': timeout} + msg_args = {'cmd': " ".join(cmd), 'seconds': str(timeout)} raise TimeoutExpired("KCHUTILS0002E", msg_args) return out, error, proc.returncode -- 1.8.3.1

From: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> Implementing a workaround in '_get_storagepool_vols_num', 'lookup', 'activate' and 'deactivate' methods to avoid libvirt calls in NFS pools when the corresponding NFS server is unreachable. In the 'lookup' method the state of the pool returned in such scenario is 'inaccessible' to warn the UI that the pool isn't available for normal operation. Signed-off-by: Daniel Henrique Barboza <danielhb@linux.vnet.ibm.com> --- src/kimchi/model/storagepools.py | 52 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/kimchi/model/storagepools.py b/src/kimchi/model/storagepools.py index 011feb0..d0708e8 100644 --- a/src/kimchi/model/storagepools.py +++ b/src/kimchi/model/storagepools.py @@ -194,15 +194,45 @@ class StoragePoolModel(object): return source + def _nfs_status_online(self, pool, poolArgs=None): + if not poolArgs: + xml = pool.XMLDesc(0) + pool_type = xmlutils.xpath_get_text(xml, "/pool/@type")[0] + source = self._get_storage_source(pool_type, xml) + poolArgs = {} + poolArgs['name'] = pool.name() + poolArgs['type'] = pool_type + poolArgs['source'] = {'path': source['path'], + 'host': source['addr']} + conn = self.conn.get() + poolDef = StoragePoolDef.create(poolArgs) + try: + poolDef.prepare(conn) + return True + except Exception as e: + return False + def lookup(self, name): pool = self.get_storagepool(name, self.conn) info = pool.info() - nr_volumes = self._get_storagepool_vols_num(pool) autostart = True if pool.autostart() else False xml = pool.XMLDesc(0) path = xmlutils.xpath_get_text(xml, "/pool/target/path")[0] pool_type = xmlutils.xpath_get_text(xml, "/pool/@type")[0] source = self._get_storage_source(pool_type, xml) + #FIXME: nfs workaround - prevent any libvirt operation + # for a nfs if the corresponding NFS server is down. + if pool_type == 'netfs' and not self._nfs_status_online(pool): + kimchi_log.debug("NFS pool %s is offline, reason: NFS " + "server %s is unreachable.", name, + source['addr']) + # Mark state as '4' => inaccessible. + info[0] = 4 + # skip calculating volumes + nr_volumes = 0 + else: + nr_volumes = self._get_storagepool_vols_num(pool) + res = {'state': POOL_STATE_MAP[info[0]], 'path': path, 'source': source, @@ -267,6 +297,16 @@ class StoragePoolModel(object): def activate(self, name): pool = self.get_storagepool(name, self.conn) + #FIXME: nfs workaround - do not activate a NFS pool + # if the NFS server is not reachable. + xml = pool.XMLDesc(0) + pool_type = xmlutils.xpath_get_text(xml, "/pool/@type")[0] + if pool_type == 'netfs' and not self._nfs_status_online(pool): + # block the user from activating the pool. + source = self._get_storage_source(pool_type, xml) + raise OperationFailed("KCHPOOL0032E", + {'name': name, 'server': source['addr']}) + return try: pool.create(0) except libvirt.libvirtError as e: @@ -275,6 +315,16 @@ class StoragePoolModel(object): def deactivate(self, name): pool = self.get_storagepool(name, self.conn) + #FIXME: nfs workaround - do not try to deactivate a NFS pool + # if the NFS server is not reachable. + xml = pool.XMLDesc(0) + pool_type = xmlutils.xpath_get_text(xml, "/pool/@type")[0] + if pool_type == 'netfs' and not self._nfs_status_online(pool): + # block the user from dactivating the pool. + source = self._get_storage_source(pool_type, xml) + raise OperationFailed("KCHPOOL0033E", + {'name': name, 'server': source['addr']}) + return try: pool.destroy() except libvirt.libvirtError as e: -- 1.8.3.1
participants (2)
-
Aline Manera
-
Daniel Barboza