[PATCH v2] host/partitions: avoid calling disks.get_partitions_names() for each partition

Problem: In PartitionModel.lookup(), it calls disks.get_partitions_names() to verify the given partition is eligible to be used in a new logical storage pool as follow. if name not in disks.get_partitions_names(): raise NotFoundError(...) When the front-end issues GET request to host/partitions, the back-end would call get_partitions_names() for each partition. If there are many disks and partitions in the host, get_partitions_names() takes a long time to finish, and calling get_partitions_names() for each partition would make the GET request timeout. In all, the original control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() verify partition name in disks.get_partitions_names() disks.get_partition_details() The solution is to split the block device verification code out of disks.get_partitions_names(), and put it in a new function disks._is_available(). Now get_partitions_names() just returns all the block devices, and disks.get_partition_details() can perform the eligibility check by calling disks._is_available(). The original get_partitions_names() checks eligibility for all the detected device, so calling it for each partition is too slow. The new _is_available() function just checks the given block device. The New control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() disks.get_partition_details() check if the block device _is_available() v1: Make use of the resource and model arguments to have Partitions collection controller set a flag, so Partition model can inspect the flag and skip the check accordingly. v2: v1 solution is too hack, re-write the patch by refactoring the underlying disks model. Split the batch verification into small peices amortized by each concrete block device. Signed-off-by: Zhou Zheng Sheng <zhshzhou@linux.vnet.ibm.com> --- src/kimchi/control/host.py | 6 +++++- src/kimchi/disks.py | 37 +++++++++++++++++++++++-------------- src/kimchi/model/host.py | 3 --- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/kimchi/control/host.py b/src/kimchi/control/host.py index ad34919..ebf1bed 100644 --- a/src/kimchi/control/host.py +++ b/src/kimchi/control/host.py @@ -21,7 +21,7 @@ import cherrypy from kimchi.control.base import Collection, Resource, SimpleCollection from kimchi.control.utils import UrlSubNode, validate_method -from kimchi.exception import OperationFailed +from kimchi.exception import OperationFailed, NotFoundError from kimchi.template import render @@ -80,6 +80,7 @@ class Partitions(Collection): # sorted by their path def _get_resources(self, flag_filter): res_list = super(Partitions, self)._get_resources(flag_filter) + res_list = filter(lambda x: x.info['available'], res_list) res_list.sort(key=lambda x: x.info['path']) return res_list @@ -90,6 +91,9 @@ class Partition(Resource): @property def data(self): + if not self.info['available']: + raise NotFoundError("KCHPART0001E", {'name': self.info['name']}) + return self.info diff --git a/src/kimchi/disks.py b/src/kimchi/disks.py index 842fd48..a13c753 100644 --- a/src/kimchi/disks.py +++ b/src/kimchi/disks.py @@ -142,7 +142,23 @@ def _get_vgname(devNodePath): return re.findall(r"LVM2_VG_NAME='([^\']*)'", out)[0] -def get_partitions_names(): +def _is_available(name, devtype, fstype, mountpoint, majmin): + devNodePath = _get_dev_node_path(majmin) + # Only list unmounted and unformated and leaf and (partition or disk) + # leaf means a partition, a disk has no partition, or a disk not held + # by any multipath device. Physical volume belongs to no volume group + # is also listed. Extended partitions should not be listed. + if (devtype in ['part', 'disk', 'mpath'] and + fstype in ['', 'LVM2_member'] and + mountpoint == "" and + _get_vgname(devNodePath) == "" and + _is_dev_leaf(devNodePath) and + not _is_dev_extended_partition(devtype, devNodePath)): + return True + return False + + +def get_partitions_names(check=False): names = set() keys = ["NAME", "TYPE", "FSTYPE", "MOUNTPOINT", "MAJ:MIN"] # output is on format key="value", @@ -151,26 +167,17 @@ def get_partitions_names(): # split()[0] to avoid the second part of the name, after the # whiteline name = dev['name'].split()[0] - devNodePath = _get_dev_node_path(dev['maj:min']) - # Only list unmounted and unformated and leaf and (partition or disk) - # leaf means a partition, a disk has no partition, or a disk not held - # by any multipath device. Physical volume belongs to no volume group - # is also listed. Extended partitions should not be listed. - if not (dev['type'] in ['part', 'disk', 'mpath'] and - dev['fstype'] in ['', 'LVM2_member'] and - dev['mountpoint'] == "" and - _get_vgname(devNodePath) == "" and - _is_dev_leaf(devNodePath) and - not _is_dev_extended_partition(dev['type'], devNodePath)): + if check and not _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], dev['maj:min']): continue - names.add(name) return list(names) def get_partition_details(name): - dev_path = _get_dev_node_path(_get_dev_major_min(name)) + majmin = _get_dev_major_min(name) + dev_path = _get_dev_node_path(majmin) keys = ["TYPE", "FSTYPE", "SIZE", "MOUNTPOINT"] try: @@ -180,6 +187,8 @@ def get_partition_details(name): "Error getting partition info for %s: %s", name, e) return {} + dev['available'] = _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], majmin) if dev['mountpoint']: # Sometimes the mountpoint comes with [SWAP] or other # info which is not an actual mount point. Filtering it diff --git a/src/kimchi/model/host.py b/src/kimchi/model/host.py index 47b0aa1..e9ac487 100644 --- a/src/kimchi/model/host.py +++ b/src/kimchi/model/host.py @@ -245,9 +245,6 @@ class PartitionModel(object): pass def lookup(self, name): - if name not in disks.get_partitions_names(): - raise NotFoundError("KCHPART0001E", {'name': name}) - return disks.get_partition_details(name) -- 1.9.0

Reviewed-by: ShaoHe Feng <shaohef@linux.vnet.ibm.com> looks good for me. Yes, V1 is some hack. This is looks better. On 05/07/2014 02:17 PM, Zhou Zheng Sheng wrote:
Problem: In PartitionModel.lookup(), it calls disks.get_partitions_names() to verify the given partition is eligible to be used in a new logical storage pool as follow. if name not in disks.get_partitions_names(): raise NotFoundError(...)
When the front-end issues GET request to host/partitions, the back-end would call get_partitions_names() for each partition. If there are many disks and partitions in the host, get_partitions_names() takes a long time to finish, and calling get_partitions_names() for each partition would make the GET request timeout.
In all, the original control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() verify partition name in disks.get_partitions_names() disks.get_partition_details()
The solution is to split the block device verification code out of disks.get_partitions_names(), and put it in a new function disks._is_available(). Now get_partitions_names() just returns all the block devices, and disks.get_partition_details() can perform the eligibility check by calling disks._is_available(). The original get_partitions_names() checks eligibility for all the detected device, so calling it for each partition is too slow. The new _is_available() function just checks the given block device.
The New control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() disks.get_partition_details() check if the block device _is_available()
v1: Make use of the resource and model arguments to have Partitions collection controller set a flag, so Partition model can inspect the flag and skip the check accordingly.
v2: v1 solution is too hack, re-write the patch by refactoring the underlying disks model. Split the batch verification into small peices amortized by each concrete block device.
Signed-off-by: Zhou Zheng Sheng <zhshzhou@linux.vnet.ibm.com> --- src/kimchi/control/host.py | 6 +++++- src/kimchi/disks.py | 37 +++++++++++++++++++++++-------------- src/kimchi/model/host.py | 3 --- 3 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/src/kimchi/control/host.py b/src/kimchi/control/host.py index ad34919..ebf1bed 100644 --- a/src/kimchi/control/host.py +++ b/src/kimchi/control/host.py @@ -21,7 +21,7 @@ import cherrypy
from kimchi.control.base import Collection, Resource, SimpleCollection from kimchi.control.utils import UrlSubNode, validate_method -from kimchi.exception import OperationFailed +from kimchi.exception import OperationFailed, NotFoundError from kimchi.template import render
@@ -80,6 +80,7 @@ class Partitions(Collection): # sorted by their path def _get_resources(self, flag_filter): res_list = super(Partitions, self)._get_resources(flag_filter) + res_list = filter(lambda x: x.info['available'], res_list) res_list.sort(key=lambda x: x.info['path']) return res_list
@@ -90,6 +91,9 @@ class Partition(Resource):
@property def data(self): + if not self.info['available']: + raise NotFoundError("KCHPART0001E", {'name': self.info['name']}) + return self.info
diff --git a/src/kimchi/disks.py b/src/kimchi/disks.py index 842fd48..a13c753 100644 --- a/src/kimchi/disks.py +++ b/src/kimchi/disks.py @@ -142,7 +142,23 @@ def _get_vgname(devNodePath): return re.findall(r"LVM2_VG_NAME='([^\']*)'", out)[0]
-def get_partitions_names(): +def _is_available(name, devtype, fstype, mountpoint, majmin): + devNodePath = _get_dev_node_path(majmin) + # Only list unmounted and unformated and leaf and (partition or disk) + # leaf means a partition, a disk has no partition, or a disk not held + # by any multipath device. Physical volume belongs to no volume group + # is also listed. Extended partitions should not be listed. + if (devtype in ['part', 'disk', 'mpath'] and + fstype in ['', 'LVM2_member'] and + mountpoint == "" and + _get_vgname(devNodePath) == "" and + _is_dev_leaf(devNodePath) and + not _is_dev_extended_partition(devtype, devNodePath)): + return True + return False + + +def get_partitions_names(check=False): names = set() keys = ["NAME", "TYPE", "FSTYPE", "MOUNTPOINT", "MAJ:MIN"] # output is on format key="value", @@ -151,26 +167,17 @@ def get_partitions_names(): # split()[0] to avoid the second part of the name, after the # whiteline name = dev['name'].split()[0] - devNodePath = _get_dev_node_path(dev['maj:min']) - # Only list unmounted and unformated and leaf and (partition or disk) - # leaf means a partition, a disk has no partition, or a disk not held - # by any multipath device. Physical volume belongs to no volume group - # is also listed. Extended partitions should not be listed. - if not (dev['type'] in ['part', 'disk', 'mpath'] and - dev['fstype'] in ['', 'LVM2_member'] and - dev['mountpoint'] == "" and - _get_vgname(devNodePath) == "" and - _is_dev_leaf(devNodePath) and - not _is_dev_extended_partition(dev['type'], devNodePath)): + if check and not _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], dev['maj:min']): continue - names.add(name)
return list(names)
def get_partition_details(name): - dev_path = _get_dev_node_path(_get_dev_major_min(name)) + majmin = _get_dev_major_min(name) + dev_path = _get_dev_node_path(majmin)
keys = ["TYPE", "FSTYPE", "SIZE", "MOUNTPOINT"] try: @@ -180,6 +187,8 @@ def get_partition_details(name): "Error getting partition info for %s: %s", name, e) return {}
+ dev['available'] = _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], majmin) if dev['mountpoint']: # Sometimes the mountpoint comes with [SWAP] or other # info which is not an actual mount point. Filtering it diff --git a/src/kimchi/model/host.py b/src/kimchi/model/host.py index 47b0aa1..e9ac487 100644 --- a/src/kimchi/model/host.py +++ b/src/kimchi/model/host.py @@ -245,9 +245,6 @@ class PartitionModel(object): pass
def lookup(self, name): - if name not in disks.get_partitions_names(): - raise NotFoundError("KCHPART0001E", {'name': name}) - return disks.get_partition_details(name)
-- Thanks and best regards! Sheldon Feng(冯少合)<shaohef@linux.vnet.ibm.com> IBM Linux Technology Center

On 05/07/2014 03:17 AM, Zhou Zheng Sheng wrote:
Problem: In PartitionModel.lookup(), it calls disks.get_partitions_names() to verify the given partition is eligible to be used in a new logical storage pool as follow. if name not in disks.get_partitions_names(): raise NotFoundError(...)
When the front-end issues GET request to host/partitions, the back-end would call get_partitions_names() for each partition. If there are many disks and partitions in the host, get_partitions_names() takes a long time to finish, and calling get_partitions_names() for each partition would make the GET request timeout.
In all, the original control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() verify partition name in disks.get_partitions_names() disks.get_partition_details()
The solution is to split the block device verification code out of disks.get_partitions_names(), and put it in a new function disks._is_available(). Now get_partitions_names() just returns all the block devices, and disks.get_partition_details() can perform the eligibility check by calling disks._is_available(). The original get_partitions_names() checks eligibility for all the detected device, so calling it for each partition is too slow. The new _is_available() function just checks the given block device.
The New control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() disks.get_partition_details() check if the block device _is_available()
v1: Make use of the resource and model arguments to have Partitions collection controller set a flag, so Partition model can inspect the flag and skip the check accordingly.
v2: v1 solution is too hack, re-write the patch by refactoring the underlying disks model. Split the batch verification into small peices amortized by each concrete block device.
Signed-off-by: Zhou Zheng Sheng <zhshzhou@linux.vnet.ibm.com> --- src/kimchi/control/host.py | 6 +++++- src/kimchi/disks.py | 37 +++++++++++++++++++++++-------------- src/kimchi/model/host.py | 3 --- 3 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/src/kimchi/control/host.py b/src/kimchi/control/host.py index ad34919..ebf1bed 100644 --- a/src/kimchi/control/host.py +++ b/src/kimchi/control/host.py @@ -21,7 +21,7 @@ import cherrypy
from kimchi.control.base import Collection, Resource, SimpleCollection from kimchi.control.utils import UrlSubNode, validate_method -from kimchi.exception import OperationFailed +from kimchi.exception import OperationFailed, NotFoundError from kimchi.template import render
@@ -80,6 +80,7 @@ class Partitions(Collection): # sorted by their path def _get_resources(self, flag_filter): res_list = super(Partitions, self)._get_resources(flag_filter) + res_list = filter(lambda x: x.info['available'], res_list) res_list.sort(key=lambda x: x.info['path']) return res_list
@@ -90,6 +91,9 @@ class Partition(Resource):
@property def data(self):
+ if not self.info['available']: + raise NotFoundError("KCHPART0001E", {'name': self.info['name']}) +
This logic should be in model
return self.info
diff --git a/src/kimchi/disks.py b/src/kimchi/disks.py index 842fd48..a13c753 100644 --- a/src/kimchi/disks.py +++ b/src/kimchi/disks.py @@ -142,7 +142,23 @@ def _get_vgname(devNodePath): return re.findall(r"LVM2_VG_NAME='([^\']*)'", out)[0]
-def get_partitions_names(): +def _is_available(name, devtype, fstype, mountpoint, majmin): + devNodePath = _get_dev_node_path(majmin) + # Only list unmounted and unformated and leaf and (partition or disk) + # leaf means a partition, a disk has no partition, or a disk not held + # by any multipath device. Physical volume belongs to no volume group + # is also listed. Extended partitions should not be listed. + if (devtype in ['part', 'disk', 'mpath'] and + fstype in ['', 'LVM2_member'] and + mountpoint == "" and + _get_vgname(devNodePath) == "" and + _is_dev_leaf(devNodePath) and + not _is_dev_extended_partition(devtype, devNodePath)): + return True + return False + + +def get_partitions_names(check=False): names = set() keys = ["NAME", "TYPE", "FSTYPE", "MOUNTPOINT", "MAJ:MIN"] # output is on format key="value", @@ -151,26 +167,17 @@ def get_partitions_names(): # split()[0] to avoid the second part of the name, after the # whiteline name = dev['name'].split()[0] - devNodePath = _get_dev_node_path(dev['maj:min']) - # Only list unmounted and unformated and leaf and (partition or disk) - # leaf means a partition, a disk has no partition, or a disk not held - # by any multipath device. Physical volume belongs to no volume group - # is also listed. Extended partitions should not be listed. - if not (dev['type'] in ['part', 'disk', 'mpath'] and - dev['fstype'] in ['', 'LVM2_member'] and - dev['mountpoint'] == "" and - _get_vgname(devNodePath) == "" and - _is_dev_leaf(devNodePath) and - not _is_dev_extended_partition(dev['type'], devNodePath)): + if check and not _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], dev['maj:min']): continue - names.add(name)
return list(names)
def get_partition_details(name): - dev_path = _get_dev_node_path(_get_dev_major_min(name)) + majmin = _get_dev_major_min(name) + dev_path = _get_dev_node_path(majmin)
keys = ["TYPE", "FSTYPE", "SIZE", "MOUNTPOINT"] try: @@ -180,6 +187,8 @@ def get_partition_details(name): "Error getting partition info for %s: %s", name, e) return {}
+ dev['available'] = _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], majmin) if dev['mountpoint']: # Sometimes the mountpoint comes with [SWAP] or other # info which is not an actual mount point. Filtering it diff --git a/src/kimchi/model/host.py b/src/kimchi/model/host.py index 47b0aa1..e9ac487 100644 --- a/src/kimchi/model/host.py +++ b/src/kimchi/model/host.py @@ -245,9 +245,6 @@ class PartitionModel(object): pass
def lookup(self, name): - if name not in disks.get_partitions_names(): - raise NotFoundError("KCHPART0001E", {'name': name}) - return disks.get_partition_details(name)

on 2014.05/08 23:08, Aline Manera wrote:
On 05/07/2014 03:17 AM, Zhou Zheng Sheng wrote:
Problem: In PartitionModel.lookup(), it calls disks.get_partitions_names() to verify the given partition is eligible to be used in a new logical storage pool as follow. if name not in disks.get_partitions_names(): raise NotFoundError(...)
When the front-end issues GET request to host/partitions, the back-end would call get_partitions_names() for each partition. If there are many disks and partitions in the host, get_partitions_names() takes a long time to finish, and calling get_partitions_names() for each partition would make the GET request timeout.
In all, the original control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() verify partition name in disks.get_partitions_names() disks.get_partition_details()
The solution is to split the block device verification code out of disks.get_partitions_names(), and put it in a new function disks._is_available(). Now get_partitions_names() just returns all the block devices, and disks.get_partition_details() can perform the eligibility check by calling disks._is_available(). The original get_partitions_names() checks eligibility for all the detected device, so calling it for each partition is too slow. The new _is_available() function just checks the given block device.
The New control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() disks.get_partition_details() check if the block device _is_available()
v1: Make use of the resource and model arguments to have Partitions collection controller set a flag, so Partition model can inspect the flag and skip the check accordingly.
v2: v1 solution is too hack, re-write the patch by refactoring the underlying disks model. Split the batch verification into small peices amortized by each concrete block device.
Signed-off-by: Zhou Zheng Sheng <zhshzhou@linux.vnet.ibm.com> --- src/kimchi/control/host.py | 6 +++++- src/kimchi/disks.py | 37 +++++++++++++++++++++++-------------- src/kimchi/model/host.py | 3 --- 3 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/src/kimchi/control/host.py b/src/kimchi/control/host.py index ad34919..ebf1bed 100644 --- a/src/kimchi/control/host.py +++ b/src/kimchi/control/host.py @@ -21,7 +21,7 @@ import cherrypy
from kimchi.control.base import Collection, Resource, SimpleCollection from kimchi.control.utils import UrlSubNode, validate_method -from kimchi.exception import OperationFailed +from kimchi.exception import OperationFailed, NotFoundError from kimchi.template import render
@@ -80,6 +80,7 @@ class Partitions(Collection): # sorted by their path def _get_resources(self, flag_filter): res_list = super(Partitions, self)._get_resources(flag_filter) + res_list = filter(lambda x: x.info['available'], res_list) res_list.sort(key=lambda x: x.info['path']) return res_list
@@ -90,6 +91,9 @@ class Partition(Resource):
@property def data(self):
+ if not self.info['available']: + raise NotFoundError("KCHPART0001E", {'name': self.info['name']}) +
This logic should be in model
Thanks Aline, you are correct. Originally this is in model, but it brings a problem. This patch is to avoid redundant disks._is_available() invocation. Before this patch, the availability checking is in both when Kimchi generates the partition names and when Kimchi lookups the particular partition info. So When the front-end GETs host/partitions, Kimchi generates a list of partition names, then lookups the information, and each partition gets checked twice, thus makes the process slow. So my idea is to drop the availability check in the disks.get_partitions_names(), so it returns all partitions, then we check when it lookups the partition info. However, if it raises NotFoundError in PartitionModel.lookup(), Partitions._get_resources() would explode because the partition names returned from disks.get_partitions_names() may contain unavailable partitions. So I have two choices to deal with this problem. I can either drop the invocation to super._get_resources() and complete re-implement Partitions._get_resources() to catch the NotFoundError exception raised by PartitionModel.lookup(), or I have it do not raise the exception but set an availability flag for later check. I think the second choice is less evil.
return self.info
diff --git a/src/kimchi/disks.py b/src/kimchi/disks.py index 842fd48..a13c753 100644 --- a/src/kimchi/disks.py +++ b/src/kimchi/disks.py @@ -142,7 +142,23 @@ def _get_vgname(devNodePath): return re.findall(r"LVM2_VG_NAME='([^\']*)'", out)[0]
-def get_partitions_names(): +def _is_available(name, devtype, fstype, mountpoint, majmin): + devNodePath = _get_dev_node_path(majmin) + # Only list unmounted and unformated and leaf and (partition or disk) + # leaf means a partition, a disk has no partition, or a disk not held + # by any multipath device. Physical volume belongs to no volume group + # is also listed. Extended partitions should not be listed. + if (devtype in ['part', 'disk', 'mpath'] and + fstype in ['', 'LVM2_member'] and + mountpoint == "" and + _get_vgname(devNodePath) == "" and + _is_dev_leaf(devNodePath) and + not _is_dev_extended_partition(devtype, devNodePath)): + return True + return False + + +def get_partitions_names(check=False): names = set() keys = ["NAME", "TYPE", "FSTYPE", "MOUNTPOINT", "MAJ:MIN"] # output is on format key="value", @@ -151,26 +167,17 @@ def get_partitions_names(): # split()[0] to avoid the second part of the name, after the # whiteline name = dev['name'].split()[0] - devNodePath = _get_dev_node_path(dev['maj:min']) - # Only list unmounted and unformated and leaf and (partition or disk) - # leaf means a partition, a disk has no partition, or a disk not held - # by any multipath device. Physical volume belongs to no volume group - # is also listed. Extended partitions should not be listed. - if not (dev['type'] in ['part', 'disk', 'mpath'] and - dev['fstype'] in ['', 'LVM2_member'] and - dev['mountpoint'] == "" and - _get_vgname(devNodePath) == "" and - _is_dev_leaf(devNodePath) and - not _is_dev_extended_partition(dev['type'], devNodePath)): + if check and not _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], dev['maj:min']): continue - names.add(name)
return list(names)
def get_partition_details(name): - dev_path = _get_dev_node_path(_get_dev_major_min(name)) + majmin = _get_dev_major_min(name) + dev_path = _get_dev_node_path(majmin)
keys = ["TYPE", "FSTYPE", "SIZE", "MOUNTPOINT"] try: @@ -180,6 +187,8 @@ def get_partition_details(name): "Error getting partition info for %s: %s", name, e) return {}
+ dev['available'] = _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], majmin) if dev['mountpoint']: # Sometimes the mountpoint comes with [SWAP] or other # info which is not an actual mount point. Filtering it diff --git a/src/kimchi/model/host.py b/src/kimchi/model/host.py index 47b0aa1..e9ac487 100644 --- a/src/kimchi/model/host.py +++ b/src/kimchi/model/host.py @@ -245,9 +245,6 @@ class PartitionModel(object): pass
def lookup(self, name): - if name not in disks.get_partitions_names(): - raise NotFoundError("KCHPART0001E", {'name': name}) - return disks.get_partition_details(name)
-- Zhou Zheng Sheng / 周征晟 E-mail: zhshzhou@linux.vnet.ibm.com Telephone: 86-10-82454397

On 05/08/2014 10:54 PM, Zhou Zheng Sheng wrote:
on 2014.05/08 23:08, Aline Manera wrote:
On 05/07/2014 03:17 AM, Zhou Zheng Sheng wrote:
Problem: In PartitionModel.lookup(), it calls disks.get_partitions_names() to verify the given partition is eligible to be used in a new logical storage pool as follow. if name not in disks.get_partitions_names(): raise NotFoundError(...)
When the front-end issues GET request to host/partitions, the back-end would call get_partitions_names() for each partition. If there are many disks and partitions in the host, get_partitions_names() takes a long time to finish, and calling get_partitions_names() for each partition would make the GET request timeout.
In all, the original control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() verify partition name in disks.get_partitions_names() disks.get_partition_details()
The solution is to split the block device verification code out of disks.get_partitions_names(), and put it in a new function disks._is_available(). Now get_partitions_names() just returns all the block devices, and disks.get_partition_details() can perform the eligibility check by calling disks._is_available(). The original get_partitions_names() checks eligibility for all the detected device, so calling it for each partition is too slow. The new _is_available() function just checks the given block device.
The New control flow is as follow. GET host/partitions Partitions._get_resources() PartitionsModel.get_list() disks.get_partitions_names() for each partition name: Partition.lookup() PartitionModel.lookup() disks.get_partition_details() check if the block device _is_available()
v1: Make use of the resource and model arguments to have Partitions collection controller set a flag, so Partition model can inspect the flag and skip the check accordingly.
v2: v1 solution is too hack, re-write the patch by refactoring the underlying disks model. Split the batch verification into small peices amortized by each concrete block device.
Signed-off-by: Zhou Zheng Sheng <zhshzhou@linux.vnet.ibm.com> --- src/kimchi/control/host.py | 6 +++++- src/kimchi/disks.py | 37 +++++++++++++++++++++++-------------- src/kimchi/model/host.py | 3 --- 3 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/src/kimchi/control/host.py b/src/kimchi/control/host.py index ad34919..ebf1bed 100644 --- a/src/kimchi/control/host.py +++ b/src/kimchi/control/host.py @@ -21,7 +21,7 @@ import cherrypy
from kimchi.control.base import Collection, Resource, SimpleCollection from kimchi.control.utils import UrlSubNode, validate_method -from kimchi.exception import OperationFailed +from kimchi.exception import OperationFailed, NotFoundError from kimchi.template import render
@@ -80,6 +80,7 @@ class Partitions(Collection): # sorted by their path def _get_resources(self, flag_filter): res_list = super(Partitions, self)._get_resources(flag_filter) + res_list = filter(lambda x: x.info['available'], res_list) res_list.sort(key=lambda x: x.info['path']) return res_list
@@ -90,6 +91,9 @@ class Partition(Resource):
@property def data(self): + if not self.info['available']: + raise NotFoundError("KCHPART0001E", {'name': self.info['name']}) + This logic should be in model
Thanks Aline, you are correct. Originally this is in model, but it brings a problem.
This patch is to avoid redundant disks._is_available() invocation. Before this patch, the availability checking is in both when Kimchi generates the partition names and when Kimchi lookups the particular partition info. So When the front-end GETs host/partitions, Kimchi generates a list of partition names, then lookups the information, and each partition gets checked twice, thus makes the process slow.
So my idea is to drop the availability check in the disks.get_partitions_names(), so it returns all partitions, then we check when it lookups the partition info. However, if it raises NotFoundError in PartitionModel.lookup(), Partitions._get_resources() would explode because the partition names returned from disks.get_partitions_names() may contain unavailable partitions.
So I have two choices to deal with this problem. I can either drop the invocation to super._get_resources() and complete re-implement Partitions._get_resources() to catch the NotFoundError exception raised by PartitionModel.lookup(), or I have it do not raise the exception but set an availability flag for later check. I think the second choice is less evil.
Got it. Thanks for the explanation.
return self.info
diff --git a/src/kimchi/disks.py b/src/kimchi/disks.py index 842fd48..a13c753 100644 --- a/src/kimchi/disks.py +++ b/src/kimchi/disks.py @@ -142,7 +142,23 @@ def _get_vgname(devNodePath): return re.findall(r"LVM2_VG_NAME='([^\']*)'", out)[0]
-def get_partitions_names(): +def _is_available(name, devtype, fstype, mountpoint, majmin): + devNodePath = _get_dev_node_path(majmin) + # Only list unmounted and unformated and leaf and (partition or disk) + # leaf means a partition, a disk has no partition, or a disk not held + # by any multipath device. Physical volume belongs to no volume group + # is also listed. Extended partitions should not be listed. + if (devtype in ['part', 'disk', 'mpath'] and + fstype in ['', 'LVM2_member'] and + mountpoint == "" and + _get_vgname(devNodePath) == "" and + _is_dev_leaf(devNodePath) and + not _is_dev_extended_partition(devtype, devNodePath)): + return True + return False + + +def get_partitions_names(check=False): names = set() keys = ["NAME", "TYPE", "FSTYPE", "MOUNTPOINT", "MAJ:MIN"] # output is on format key="value", @@ -151,26 +167,17 @@ def get_partitions_names(): # split()[0] to avoid the second part of the name, after the # whiteline name = dev['name'].split()[0] - devNodePath = _get_dev_node_path(dev['maj:min']) - # Only list unmounted and unformated and leaf and (partition or disk) - # leaf means a partition, a disk has no partition, or a disk not held - # by any multipath device. Physical volume belongs to no volume group - # is also listed. Extended partitions should not be listed. - if not (dev['type'] in ['part', 'disk', 'mpath'] and - dev['fstype'] in ['', 'LVM2_member'] and - dev['mountpoint'] == "" and - _get_vgname(devNodePath) == "" and - _is_dev_leaf(devNodePath) and - not _is_dev_extended_partition(dev['type'], devNodePath)): + if check and not _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], dev['maj:min']): continue - names.add(name)
return list(names)
def get_partition_details(name): - dev_path = _get_dev_node_path(_get_dev_major_min(name)) + majmin = _get_dev_major_min(name) + dev_path = _get_dev_node_path(majmin)
keys = ["TYPE", "FSTYPE", "SIZE", "MOUNTPOINT"] try: @@ -180,6 +187,8 @@ def get_partition_details(name): "Error getting partition info for %s: %s", name, e) return {}
+ dev['available'] = _is_available(name, dev['type'], dev['fstype'], + dev['mountpoint'], majmin) if dev['mountpoint']: # Sometimes the mountpoint comes with [SWAP] or other # info which is not an actual mount point. Filtering it diff --git a/src/kimchi/model/host.py b/src/kimchi/model/host.py index 47b0aa1..e9ac487 100644 --- a/src/kimchi/model/host.py +++ b/src/kimchi/model/host.py @@ -245,9 +245,6 @@ class PartitionModel(object): pass
def lookup(self, name): - if name not in disks.get_partitions_names(): - raise NotFoundError("KCHPART0001E", {'name': name}) - return disks.get_partition_details(name)
participants (3)
-
Aline Manera
-
Sheldon
-
Zhou Zheng Sheng