Hi Benny et all,
Quick recap on the last testing environment I put together for Muli on this topic.
- 2-3 weeks ago I deployed oVirt 4.4 back then and that’s when I captured my original
traces.
- Muli asked me to deploy 4.5 from the master branch to bring the cluster with the latest
code so it would be closer to what you are working on
With 4.4 I was able to bring up the cluster with 1 host attached to the cluster.
Everything running on a single machine called client2
With 4.5 I can not bring the host up
Here is my cluster spec
In the UI I see the following when trying to add host client2
However when I check the nodes capabilities using Vdsm client I get this for each flag
mentioned
[root@client2 ~]# vdsm-client Host getCapabilities | grep kvm
"cpuFlags":
"clflush,cqm_llc,sdbg,abm,movbe,bmi1,intel_ppin,apic,sse2,mce,arat,amd-ssbd,monitor,xsaveopt,ida,bmi2,pclmulqdq,ssse3,pni,ss,rdrand,xsave,avx2,intel_pt,sse4_2,ept_ad,stibp,rdseed,pse,dts,dtes64,3dnowprefetch,cpuid_fault,acpi,fsgsbase,cdp_l3,x2apic,fpu,invpcid_single,sse4_1,pti,amd-stibp,ht,pae,pat,tsc,mmx,nonstop_tsc,pdcm,ssbd,invpcid,cqm,de,lahf_lm,vpid,smep,tm,vmx,bts,rdtscp,flexpriority,rtm,pse36,smap,cqm_mbm_total,cmov,smx,skip-l1dfl-vmentry,est,tm2,hypervisor,pge,tsc_deadline_timer,invtsc,nx,pbe,f16c,vnmi,nopl,epb,cx8,msr,umip,pcid,aperfmperf,xtpr,avx,cqm_occup_llc,pdpe1gb,constant_tsc,tsc_adjust,sse,arch_perfmon,ept,ibrs,adx,dtherm,pschange-mc-no,cat_l3,rep_good,ibpb,pln,fma,sep,aes,pts,syscall,xtopology,rdt_a,erms,cpuid,flush_l1d,vme,cx16,popcnt,mca,cqm_mbm_local,mtrr,arch-capabilities,dca,tpr_shadow,lm,ds_cpl,fxsr,hle,pebs,spec_ctrl,model_Opteron_G1,model_486,model_Broadwell-noTSX-IBRS,model_Westmere-IBRS,model_IvyBridge-IBRS,model_Conroe,model_Opteron_G2,model_Broadwell-noTSX,model_qemu32,model_Haswell,model_Westmere,model_Haswell-IBRS,model_core2duo,model_Broadwell-IBRS,model_Broadwell,model_n270,model_Haswell-noTSX-IBRS,model_IvyBridge,model_pentium3,model_Penryn,model_Nehalem,model_kvm64,model_qemu64,model_SandyBridge-IBRS,model_coreduo,model_Haswell-noTSX,model_Nehalem-IBRS,model_kvm32,model_pentium,model_SandyBridge,model_pentium2",
"kvmEnabled": "true",
"qemu-kvm": {
"kvm"
[root@client2 ~]# vdsm-client Host getCapabilities | grep nx
"cpuFlags":
"clflush,cqm_llc,sdbg,abm,movbe,bmi1,intel_ppin,apic,sse2,mce,arat,amd-ssbd,monitor,xsaveopt,ida,bmi2,pclmulqdq,ssse3,pni,ss,rdrand,xsave,avx2,intel_pt,sse4_2,ept_ad,stibp,rdseed,pse,dts,dtes64,3dnowprefetch,cpuid_fault,acpi,fsgsbase,cdp_l3,x2apic,fpu,invpcid_single,sse4_1,pti,amd-stibp,ht,pae,pat,tsc,mmx,nonstop_tsc,pdcm,ssbd,invpcid,cqm,de,lahf_lm,vpid,smep,tm,vmx,bts,rdtscp,flexpriority,rtm,pse36,smap,cqm_mbm_total,cmov,smx,skip-l1dfl-vmentry,est,tm2,hypervisor,pge,tsc_deadline_timer,invtsc,nx,pbe,f16c,vnmi,nopl,epb,cx8,msr,umip,pcid,aperfmperf,xtpr,avx,cqm_occup_llc,pdpe1gb,constant_tsc,tsc_adjust,sse,arch_perfmon,ept,ibrs,adx,dtherm,pschange-mc-no,cat_l3,rep_good,ibpb,pln,fma,sep,aes,pts,syscall,xtopology,rdt_a,erms,cpuid,flush_l1d,vme,cx16,popcnt,mca,cqm_mbm_local,mtrr,arch-capabilities,dca,tpr_shadow,lm,ds_cpl,fxsr,hle,pebs,spec_ctrl,model_Opteron_G1,model_486,model_Broadwell-noTSX-IBRS,model_Westmere-IBRS,model_IvyBridge-IBRS,model_Conroe,model_Opteron_G2,model_Broadwell-noTSX,model_qemu32,model_Haswell,model_Westmere,model_Haswell-IBRS,model_core2duo,model_Broadwell-IBRS,model_Broadwell,model_n270,model_Haswell-noTSX-IBRS,model_IvyBridge,model_pentium3,model_Penryn,model_Nehalem,model_kvm64,model_qemu64,model_SandyBridge-IBRS,model_coreduo,model_Haswell-noTSX,model_Nehalem-IBRS,model_kvm32,model_pentium,model_SandyBridge,model_pentium2",
[root@client2 ~]# vdsm-client Host getCapabilities | grep vmx
"cpuFlags":
"clflush,cqm_llc,sdbg,abm,movbe,bmi1,intel_ppin,apic,sse2,mce,arat,amd-ssbd,monitor,xsaveopt,ida,bmi2,pclmulqdq,ssse3,pni,ss,rdrand,xsave,avx2,intel_pt,sse4_2,ept_ad,stibp,rdseed,pse,dts,dtes64,3dnowprefetch,cpuid_fault,acpi,fsgsbase,cdp_l3,x2apic,fpu,invpcid_single,sse4_1,pti,amd-stibp,ht,pae,pat,tsc,mmx,nonstop_tsc,pdcm,ssbd,invpcid,cqm,de,lahf_lm,vpid,smep,tm,vmx,bts,rdtscp,flexpriority,rtm,pse36,smap,cqm_mbm_total,cmov,smx,skip-l1dfl-vmentry,est,tm2,hypervisor,pge,tsc_deadline_timer,invtsc,nx,pbe,f16c,vnmi,nopl,epb,cx8,msr,umip,pcid,aperfmperf,xtpr,avx,cqm_occup_llc,pdpe1gb,constant_tsc,tsc_adjust,sse,arch_perfmon,ept,ibrs,adx,dtherm,pschange-mc-no,cat_l3,rep_good,ibpb,pln,fma,sep,aes,pts,syscall,xtopology,rdt_a,erms,cpuid,flush_l1d,vme,cx16,popcnt,mca,cqm_mbm_local,mtrr,arch-capabilities,dca,tpr_shadow,lm,ds_cpl,fxsr,hle,pebs,spec_ctrl,model_Opteron_G1,model_486,model_Broadwell-noTSX-IBRS,model_Westmere-IBRS,model_IvyBridge-IBRS,model_Conroe,model_Opteron_G2,model_Broadwell-noTSX,model_qemu32,model_Haswell,model_Westmere,model_Haswell-IBRS,model_core2duo,model_Broadwell-IBRS,model_Broadwell,model_n270,model_Haswell-noTSX-IBRS,model_IvyBridge,model_pentium3,model_Penryn,model_Nehalem,model_kvm64,model_qemu64,model_SandyBridge-IBRS,model_coreduo,model_Haswell-noTSX,model_Nehalem-IBRS,model_kvm32,model_pentium,model_SandyBridge,model_pentium2",
[root@client2 ~]# vdsm-client Host getCapabilities | grep Broadwell
"cpuFlags":
"clflush,cqm_llc,sdbg,abm,movbe,bmi1,intel_ppin,apic,sse2,mce,arat,amd-ssbd,monitor,xsaveopt,ida,bmi2,pclmulqdq,ssse3,pni,ss,rdrand,xsave,avx2,intel_pt,sse4_2,ept_ad,stibp,rdseed,pse,dts,dtes64,3dnowprefetch,cpuid_fault,acpi,fsgsbase,cdp_l3,x2apic,fpu,invpcid_single,sse4_1,pti,amd-stibp,ht,pae,pat,tsc,mmx,nonstop_tsc,pdcm,ssbd,invpcid,cqm,de,lahf_lm,vpid,smep,tm,vmx,bts,rdtscp,flexpriority,rtm,pse36,smap,cqm_mbm_total,cmov,smx,skip-l1dfl-vmentry,est,tm2,hypervisor,pge,tsc_deadline_timer,invtsc,nx,pbe,f16c,vnmi,nopl,epb,cx8,msr,umip,pcid,aperfmperf,xtpr,avx,cqm_occup_llc,pdpe1gb,constant_tsc,tsc_adjust,sse,arch_perfmon,ept,ibrs,adx,dtherm,pschange-mc-no,cat_l3,rep_good,ibpb,pln,fma,sep,aes,pts,syscall,xtopology,rdt_a,erms,cpuid,flush_l1d,vme,cx16,popcnt,mca,cqm_mbm_local,mtrr,arch-capabilities,dca,tpr_shadow,lm,ds_cpl,fxsr,hle,pebs,spec_ctrl,model_Opteron_G1,model_486,model_Broadwell-noTSX-IBRS,model_Westmere-IBRS,model_IvyBridge-IBRS,model_Conroe,model_Opteron_G2,model_Broadwell-noTSX,model_qemu32,model_Haswell,model_Westmere,model_Haswell-IBRS,model_core2duo,model_Broadwell-IBRS,model_Broadwell,model_n270,model_Haswell-noTSX-IBRS,model_IvyBridge,model_pentium3,model_Penryn,model_Nehalem,model_kvm64,model_qemu64,model_SandyBridge-IBRS,model_coreduo,model_Haswell-noTSX,model_Nehalem-IBRS,model_kvm32,model_pentium,model_SandyBridge,model_pentium2",
So all the flags the UI claims as missing are actually present.
Here is my repository configuration for this environment:
# yum repolist
Repository copr:copr.fedorainfracloud.org:ovirt:ovirt-master-snapshot is listed more than
once in the configuration
repo id
repo name
appstream
CentOS Stream 8 - AppStream
baseos
CentOS Stream 8 - BaseOS
copr:copr.fedorainfracloud.org:ovirt:ovirt-master-snapshot
Copr repo for ovirt-master-snapshot
owned by ovirt
elrepo
ELRepo.org Community Enterprise Linux
Repository - el8
extras
CentOS Stream 8 - Extras
lightbits-lightos-openstack
lightbits-lightos-openstack
lightbits-lightos-openstack-noarch
lightbits-lightos-openstack-noarch
lightbits-lightos-openstack-source
lightbits-lightos-openstack-source
ovirt-appliance-master-snapshot
oVirt appliance with
ovirt-master-snapshot content
ovirt-master-centos-opstools-testing
CentOS Stream 8 - OpsTools - collectd
ovirt-master-centos-stream-ceph-pacific
CentOS Stream 8 - Ceph packages for
x86_64
ovirt-master-centos-stream-gluster10-testing
CentOS Stream 8 - Glusterfs 10 -
testing
ovirt-master-centos-stream-nfv-openvswitch2-testing
CentOS Stream 8 - NFV OpenVSwitch 2 -
testing
ovirt-master-centos-stream-openstack-yoga-testing
CentOS Stream 8 - OpenStack Yoga
Repository - testing
ovirt-master-centos-stream-ovirt45-testing
CentOS Stream 8 - oVirt 4.5 - testing
ovirt-master-copr:copr.fedorainfracloud.org:sac:gluster-ansible
Copr repo for gluster-ansible owned by
sac
ovirt-master-epel
Extra Packages for Enterprise Linux 8 -
x86_64
ovirt-master-virtio-win-latest
virtio-win builds roughly matching what
will be shipped in upcoming RHEL
ovirt-node-master-snapshot
oVirt Node with ovirt-master-snapshot
content
powertools
CentOS Stream 8 - PowerTools
rdo-delorean-component-cinder
RDO Delorean OpenStack Cinder -
current
rdo-delorean-component-clients
RDO Delorean Clients - current
rdo-delorean-component-common
RDO Delorean Common - current
rdo-delorean-component-network
RDO Delorean Network - current
Not sure if you could provide some guidance for me to be able to fix this. Any additional
info you need let me know and I’ll provide.
Best regards
JC
On Mar 27, 2022, at 06:24, Benny Zlotnik <bzlotnik(a)redhat.com>
wrote:
Hi, I replied in the PR
Regarding testing this with vdsm-client, in theory it's possible, but
would be quite difficult as you'd have to prepare the datacenter and
add traditional storage (vdsm managed iscsi/nfs),
with:
$ vdsm-client StoragePool create/connect
then with StorageDomain namespaces, and probably a bunch of other
stuff ovirt-engine does automatically, until you can get to the
$ vdsm-client ManagedVolume attach_volume
operations.
but I am not sure how practical it is to do this, I am pretty sure it
would be much easier to do this with ovirt-engine...
Can you share what issues you ran into with ovirt-engine?
I rebased my engine PR[1] that's required to test this, new RPMs
should be available soon
[1]
https://github.com/oVirt/ovirt-engine/pull/104
On Sun, Mar 27, 2022 at 1:47 PM Muli Ben-Yehuda <muli(a)lightbitslabs.com> wrote:
>
> Hi Benny,
>
> Any update on this one?
> Also, is there a way I can test this with vdsm-client without resorting to full
ovirt? We have run into some issues with getting ovirt working with the nightlies, but
vdsm and vdsm-client appear to work fine with the patches applied, or at least, they run.
>
> Cheers,
> Muli
>
> On Thu, Mar 3, 2022 at 6:09 PM Benny Zlotnik <bzlotnik(a)redhat.com> wrote:
>>
>> Hi,
>>
>> I posted draft PRs for engine[1] and vdsm[2], they are still raw and I
>> only tested running starting VMs with ceph. If you can apply the
>> changes for lightos (only vdsm should be needed) and try it out it
>> would be great :)
>> Also, if you have any suggestions/comments/etc feel free to comment on
>> the PRs directly
>>
>> If you don't want to build ovirt-engine from source, CI generated RPMs
>> should be available in[3] (the job is still running while I'm writing
>> this email)
>>
>> [1]
https://github.com/oVirt/ovirt-engine/pull/104
>> [2]
https://github.com/oVirt/vdsm/pull/89
>> [3]
https://github.com/oVirt/ovirt-engine/actions/runs/1929008680
>>
>>
>> On Wed, Mar 2, 2022 at 4:55 PM Muli Ben-Yehuda <muli(a)lightbitslabs.com>
wrote:
>>>
>>> Thanks for the update, Benny. How can I help? For example, would logs from
running the connector with the exact data it returns be useful?
>>>
>>> Cheers,
>>> Muli
>>>
>>> On Tue, Mar 1, 2022 at 8:39 PM Benny Zlotnik <bzlotnik(a)redhat.com>
wrote:
>>>>
>>>> Hi,
>>>>
>>>> Just by browsing the code, I can think of one issue in[1], as a result
>>>> of[2] where we only considered iscsi and rbd drivers, I suspect your
>>>> driver will go into this branch, based on the issue in the 4.3 logs I
>>>> went over:
>>>>
backend/manager/modules/vdsbroker/src/main/java/org/ovirt/engine/core/vdsbroker/builder/vminfo/LibvirtVmXmlBuilder.java
>>>>
>>>> } else if (managedBlockStorageDisk.getCinderVolumeDriver()
>>>> == CinderVolumeDriver.BLOCK) {
>>>> Map<String, Object> attachment =
>>>> (Map<String, Object>)
>>>> managedBlockStorageDisk.getDevice().get(DeviceInfoReturn.ATTACHMENT);
>>>> metadata = Map.of(
>>>> "GUID",
>>>> (String)attachment.get(DeviceInfoReturn.SCSI_WWN),
>>>> "managed", "true"
>>>>
>>>> Which will make it go into the wrong branch in clientIF.py, appending
>>>> the empty GUID to /dev/mapper. Perhaps it is possible workaround it in
>>>> clientIF if you just want to try and get the VM started for now, by
>>>> checking if GUID is empty and deferring to:
>>>> volPath = drive['path']
>>>>
>>>> But as discussed in this thread, our attempt at constructing the
>>>> stable paths ourselves doesn't really scale. After further
discussion
>>>> with Nir I started working on creating a link in vdsm in
>>>> managevolume.py#attach_volume to the path returned by the driver, and
>>>> engine will use our link to run the VMs.
>>>> This should simplify the code and resolve the live VM migration issue.
>>>> I had some preliminary success with this so I'll try to post the
>>>> patches soon
>>>>
>>>>
>>>> [1]
https://github.com/oVirt/vdsm/blob/d957a06a4d988489c83da171fcd9cfd254b12c...
>>>> [2]
https://github.com/oVirt/ovirt-engine/blob/24530d17874e20581deee4b0e31914...
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> On Tue, Mar 1, 2022 at 6:12 PM Muli Ben-Yehuda
<muli(a)lightbitslabs.com> wrote:
>>>>>
>>>>> Will this support require changes in ovirt-engine or just in vdsm? I
have started to look into vdsm's managedvolume.py and its tests and it seems like
adding support for LightOS there should be pretty simple (famous last words...). Should
this be enough or do you think it will require changes in other parts of ovirt as well?
>>>>>
>>>>> Cheers,
>>>>> Muli
>>>>>
>>>>> On Mon, Feb 28, 2022 at 9:09 AM Nir Soffer <nsoffer(a)redhat.com>
wrote:
>>>>>>
>>>>>> On Fri, Feb 25, 2022 at 12:04 PM Gorka Eguileor
<geguileo(a)redhat.com> wrote:
>>>>>>>
>>>>>>> On 24/02, Nir Soffer wrote:
>>>>>>>> On Thu, Feb 24, 2022 at 8:46 PM Gorka Eguileor
<geguileo(a)redhat.com> wrote:
>>>>>>>>>
>>>>>>>>> On 24/02, Nir Soffer wrote:
>>>>>>>>>> On Thu, Feb 24, 2022 at 6:35 PM Muli Ben-Yehuda
<muli(a)lightbitslabs.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>> On Thu, Feb 24, 2022 at 6:28 PM Nir Soffer
<nsoffer(a)redhat.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> On Thu, Feb 24, 2022 at 6:10 PM Muli
Ben-Yehuda <muli(a)lightbitslabs.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Thu, Feb 24, 2022 at 3:58 PM Nir
Soffer <nsoffer(a)redhat.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Wed, Feb 23, 2022 at 6:24 PM
Muli Ben-Yehuda <muli(a)lightbitslabs.com> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks for the detailed
instructions, Nir. I'm going to scrounge up some hardware.
>>>>>>>>>>>>>>> By the way, if anyone else
would like to work on NVMe/TCP support, for NVMe/TCP target you can either use Lightbits
(talk to me offline for details) or use the upstream Linux NVMe/TCP target. Lightbits is a
clustered storage system while upstream is a single target, but the client side should be
close enough for vdsm/ovirt purposes.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I played with NVMe/TCP a little
bit, using qemu to create a virtual
>>>>>>>>>>>>>> NVMe disk, and export
>>>>>>>>>>>>>> it using the kernel on one VM,
and consume it on another VM.
>>>>>>>>>>>>>>
https://futurewei-cloud.github.io/ARM-Datacenter/qemu/nvme-of-tcp-vms/
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> One question about device naming
- do we always get the same name of the
>>>>>>>>>>>>>> device in all hosts?
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> No, we do not, see below how we
handle migration in os_brick.
>>>>>>>>>>>>>
>>>>>>>>>>>>>> To support VM migration, every
device must have unique name in the cluster.
>>>>>>>>>>>>>> With multipath we always have
unique name, since we disable "friendly names",
>>>>>>>>>>>>>> so we always have:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> /dev/mapper/{wwid}
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> With rbd we also do not use
/dev/rbdN but a unique path:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
/dev/rbd/poolname/volume-vol-id
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> How do we ensure cluster-unique
device path? If os_brick does not handle it, we
>>>>>>>>>>>>>> can to do in ovirt, for example:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> /run/vdsm/mangedvolumes/{uuid}
-> /dev/nvme7n42
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> but I think this should be
handled in cinderlib, since openstack have
>>>>>>>>>>>>>> the same problem with migration.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Indeed. Both the Lightbits LightOS
connector and the nvmeof connector do this through the target provided namespace (LUN)
UUID. After connecting to the target, the connectors wait for the local friendly-named
device file that has the right UUID to show up, and then return the friendly name. So
different hosts will have different friendly names, but the VMs will be attached to the
right namespace since we return the friendly name on the current host that has the right
UUID. Does this also work for you?
>>>>>>>>>>>>
>>>>>>>>>>>> It will not work for oVirt.
>>>>>>>>>>>>
>>>>>>>>>>>> Migration in oVirt works like this:
>>>>>>>>>>>>
>>>>>>>>>>>> 1. Attach disks to destination host
>>>>>>>>>>>> 2. Send VM XML from source host to
destination host, and start the
>>>>>>>>>>>> VM is paused mode
>>>>>>>>>>>> 3. Start the migration on the source
host
>>>>>>>>>>>> 4. When migration is done, start the CPU
on the destination host
>>>>>>>>>>>> 5. Detach the disks from the source
>>>>>>>>>>>>
>>>>>>>>>>>> This will break in step 2, since the
source xml refer to nvme device
>>>>>>>>>>>> that does not exist or already used by
another VM.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Indeed.
>>>>>>>>>>>
>>>>>>>>>>>> To make this work, the VM XML must use
the same path, existing on
>>>>>>>>>>>> both hosts.
>>>>>>>>>>>>
>>>>>>>>>>>> The issue can be solved by libvirt hook
updating the paths before qemu
>>>>>>>>>>>> is started on the destination, but I
think the right way to handle this is to
>>>>>>>>>>>> have the same path.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> You mentioned above that it can be handled in
ovirt (c.f., /run/vdsm/mangedvolumes/{uuid} -> /dev/nvme7n42), which seems like a
reasonable approach given the constraint imposed by the oVirt migration flow you outlined
above. What information does vdsm need to create and use the
/var/run/vdsm/managedvolumes/{uuid} link? Today the connector does (trimmed for brevity):
>>>>>>>>>>>
>>>>>>>>>>> def connect_volume(self,
connection_properties):
>>>>>>>>>>> device_info = {'type':
'block'}
>>>>>>>>>>> uuid =
connection_properties['uuid']
>>>>>>>>>>> device_path =
self._get_device_by_uuid(uuid)
>>>>>>>>>>> device_info['path'] =
device_path
>>>>>>>>>>> return device_info
>>>>>>>>>>
>>>>>>>>>> I think we have 2 options:
>>>>>>>>>>
>>>>>>>>>> 1. unique path created by os_brick using the
underlying uuid
>>>>>>>>>>
>>>>>>>>>> In this case the connector will return the uuid,
and ovirt will use
>>>>>>>>>> it to resolve the unique path that will be stored
and used on engine
>>>>>>>>>> side to create the vm xml.
>>>>>>>>>>
>>>>>>>>>> I'm not sure how the connector should return
this uuid. Looking in current
>>>>>>>>>> vdsm code:
>>>>>>>>>>
>>>>>>>>>> if vol_type in ("iscsi",
"fibre_channel"):
>>>>>>>>>> if "multipath_id" not in
attachment:
>>>>>>>>>> raise
se.ManagedVolumeUnsupportedDevice(vol_id, attachment)
>>>>>>>>>> # /dev/mapper/xxxyyy
>>>>>>>>>> return os.path.join(DEV_MAPPER,
attachment["multipath_id"])
>>>>>>>>>> elif vol_type == "rbd":
>>>>>>>>>> # /dev/rbd/poolname/volume-vol-id
>>>>>>>>>> return os.path.join(DEV_RBD,
connection_info['data']['name'])
>>>>>>>>>>
>>>>>>>>>> os_brick does not have a uniform way to address
different devices.
>>>>>>>>>>
>>>>>>>>>> Maybe Gorka can help with this.
>>>>>>>>>
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> That is true, because in OpenStack we haven't had
the need to have the
>>>>>>>>> same path on every host or even on the same host
during different
>>>>>>>>> connections.
>>>>>>>>>
>>>>>>>>> For nvme a new `elif` clause could be added there,
though it will be a
>>>>>>>>> bit trickier, because the nvme connection properties
format are a bit of
>>>>>>>>> a mess...
>>>>>>>>>
>>>>>>>>> We have 2 different formats for the nvme properties,
and the wwid that
>>>>>>>>> appears in symlink /dev/disk/by-id/nvme-<wwid>
may or may not be the
>>>>>>>>> volume id, may be the uuid in the connection info if
present or the
>>>>>>>>> nguid if the nvme device doesn't have uuid.
>>>>>>>>>
>>>>>>>>> For these reasons I would recommend not relying on
the connection
>>>>>>>>> information and relying on the path from the
attachment instead.
>>>>>>>>>
>>>>>>>>> Something like this should be probably fine:
>>>>>>>>>
>>>>>>>>> elif vol_type == 'nvme':
>>>>>>>>> device_name =
os.path.basename(attachment['path'])
>>>>>>>>> controller = device_name.rsplit('n',
1)[0]
>>>>>>>>> wwid_filename =
f'/sys/class/nvme/{controller}/{device_name}/wwid'
>>>>>>>>> with open(wwid_filename, 'r') as f:
>>>>>>>>> uuid = f.read().strip()
>>>>>>>>> return
os.path.join('/dev/disk/by-id/nvme-', uuid)
>>>>>>>>
>>>>>>>> Thanks Gorka!
>>>>>>>>
>>>>>>>> but isn't this duplicating logic already in os
brick?
>>>>>>>>
https://github.com/openstack/os-brick/blob/56bf0272b55dcbbc7f5b03150973a8...
>>>>>>>>
>>>>>>>
>>>>>>> Hi Nir,
>>>>>>>
>>>>>>> Oh! I thought we were talking about the generic NVMe-oF
connector,
>>>>>>> didn't know this was specific about the LightOS one.
>>>>>>>
>>>>>>> The link is used as an easy way to locate the volume, it
doesn't mean
>>>>>>> that it is returned to the caller of the `connect_volume`
method. In
>>>>>>> fact, we can see how that method actually returns the real
path and not
>>>>>>> the link's path:
>>>>>>>
>>>>>>> def _check_device_exists_using_dev_lnk(self, uuid):
>>>>>>> lnk_path =
f"/dev/disk/by-id/nvme-uuid.{uuid}"
>>>>>>> --> if os.path.exists(lnk_path):
>>>>>>> ^^^ Check link exists
>>>>>>>
>>>>>>> --> devname = os.path.realpath(lnk_path)
>>>>>>> ^^^ Get the real path for the symlink
>>>>>>>
>>>>>>> --> if devname.startswith("/dev/nvme"):
>>>>>>> ^^^ Make extra sure it's not pointing to something
crazy
>>>>>>>
>>>>>>> LOG.info("LIGHTOS: devpath %s detected
for uuid %s",
>>>>>>> devname, uuid)
>>>>>>>
>>>>>>> --> return devname
>>>>>>> ^^^ Return it
>>>>>>>
>>>>>>> return None
>>>>>>>
>>>>>>>> Another interesting detail is this wait:
>>>>>>>>
https://github.com/openstack/os-brick/blob/56bf0272b55dcbbc7f5b03150973a8...
>>>>>>>>
>>>>>>>> def _get_device_by_uuid(self, uuid):
>>>>>>>> endtime = time.time() + self.WAIT_DEVICE_TIMEOUT
>>>>>>>> while time.time() < endtime:
>>>>>>>> try:
>>>>>>>> device =
self._check_device_exists_using_dev_lnk(uuid)
>>>>>>>> if device:
>>>>>>>> return device
>>>>>>>> except Exception as e:
>>>>>>>> LOG.debug(f'LIGHTOS: {e}')
>>>>>>>> device =
self._check_device_exists_reading_block_class(uuid)
>>>>>>>> if device:
>>>>>>>> return device
>>>>>>>>
>>>>>>>> time.sleep(1)
>>>>>>>> return None
>>>>>>>>
>>>>>>>> The code does not explain why it tries to use the
/dev/disk/by-id link
>>>>>>>> and fallback to sysfs on errors. Based on our experience
with udev,
>>>>>>>> I guess that the author does not trust udev. I wonder if
we can trust
>>>>>>>> it as the stable device path.
>>>>>>>>
>>>>>>>
>>>>>>> In my experience udev rules (which is different from udev
itself) are
>>>>>>> less that reliable as a way of finding devices when working
"in the
>>>>>>> wild". They are only reliable if you have full control
over the host
>>>>>>> system and are sure nobody (admin or distro) can break
things.
>>>>>>>
>>>>>>> For reference, at Red Hat we have an RFE to improve os-brick
[1] and
>>>>>>> stop using symlinks at all.
>>>>>>>
>>>>>>> While they are not 100% reliable in the wild, they are quite
reliable
>>>>>>> once they are working on a specific system, which means that
if we
>>>>>>> confirm they are working on a system we can rely on them if
no changes
>>>>>>> are made on the system (and if CPU is not 100% during
attachment).
>>>>>>>
>>>>>>>
>>>>>>> [1]:
https://bugzilla.redhat.com/show_bug.cgi?id=1697319
>>>>>>>
>>>>>>>
>>>>>>>> If we can trust this path, maybe os_brick can return the
stable path
>>>>>>>> in a uniform way for all kind of devices?
>>>>>>>
>>>>>>> I don't think this is likely to happen, because it has no
real value for
>>>>>>> OpenStack so it's unlikely to get prioritized (for coding
and reviews).
>>>>>>
>>>>>> Since we cannot get a stable path from os-brick, and stable path
is a oVirt
>>>>>> specific requirement, we need to handle this in oVirt, similar to
the way we
>>>>>> handle multipath and rbd and traditional storage.
>>>>>>
>>>>>> Nir
>>>>>>
>>>>>
>>>>> Lightbits Labs
>>>>> Lead the cloud-native data center transformation by delivering
scalable and efficient software defined storage that is easy to consume.
>>>>>
>>>>> This message is sent in confidence for the addressee only. It may
contain legally privileged information. The contents are not to be disclosed to anyone
other than the addressee. Unauthorized recipients are requested to preserve this
confidentiality, advise the sender immediately of any error in transmission and delete the
email from their systems.
>>>>>
>>>>>
>>>>
>>>
>>> Lightbits Labs
>>> Lead the cloud-native data center transformation by delivering scalable and
efficient software defined storage that is easy to consume.
>>>
>>> This message is sent in confidence for the addressee only. It may contain
legally privileged information. The contents are not to be disclosed to anyone other than
the addressee. Unauthorized recipients are requested to preserve this confidentiality,
advise the sender immediately of any error in transmission and delete the email from their
systems.
>>>
>>>
>>
>
> Lightbits Labs
> Lead the cloud-native data center transformation by delivering scalable and efficient
software defined storage that is easy to consume.
>
> This message is sent in confidence for the addressee only. It may contain legally
privileged information. The contents are not to be disclosed to anyone other than the
addressee. Unauthorized recipients are requested to preserve this confidentiality, advise
the sender immediately of any error in transmission and delete the email from their
systems.
>
>
--
*Lightbits Labs**
*Lead the cloud-native data center
transformation by
delivering *scalable *and *efficient *software
defined storage that is
*easy *to consume.
*This message is sent in confidence for the addressee
only. It
may contain legally privileged information. The contents are not
to be
disclosed to anyone other than the addressee. Unauthorized recipients
are
requested to preserve this confidentiality, advise the sender
immediately of
any error in transmission and delete the email from their
systems.*