
On Wed, Jul 5, 2017 at 5:22 PM, Atin Mukherjee <amukherj@redhat.com> wrote:
And what does glusterd log indicate for these failures?
See here in gzip format https://drive.google.com/file/d/0BwoPbcrMv8mvYmlRLUgyV0pFN0k/view?usp=sharin... It seems that on each host the peer files have been updated with a new entry "hostname2": [root@ovirt01 ~]# cat /var/lib/glusterd/peers/* uuid=b89311fe-257f-4e44-8e15-9bff6245d689 state=3 hostname1=ovirt02.localdomain.local hostname2=10.10.2.103 uuid=ec81a04c-a19c-4d31-9d82-7543cefe79f3 state=3 hostname1=ovirt03.localdomain.local hostname2=10.10.2.104 [root@ovirt01 ~]# [root@ovirt02 ~]# cat /var/lib/glusterd/peers/* uuid=e9717281-a356-42aa-a579-a4647a29a0bc state=3 hostname1=ovirt01.localdomain.local hostname2=10.10.2.102 uuid=ec81a04c-a19c-4d31-9d82-7543cefe79f3 state=3 hostname1=ovirt03.localdomain.local hostname2=10.10.2.104 [root@ovirt02 ~]# [root@ovirt03 ~]# cat /var/lib/glusterd/peers/* uuid=b89311fe-257f-4e44-8e15-9bff6245d689 state=3 hostname1=ovirt02.localdomain.local hostname2=10.10.2.103 uuid=e9717281-a356-42aa-a579-a4647a29a0bc state=3 hostname1=ovirt01.localdomain.local hostname2=10.10.2.102 [root@ovirt03 ~]# But not the gluster info on the second and third node that have lost the ovirt01/gl01 host brick information... Eg on ovirt02 [root@ovirt02 peers]# gluster volume info export Volume Name: export Type: Replicate Volume ID: b00e5839-becb-47e7-844f-6ce6ce1b7153 Status: Started Snapshot Count: 0 Number of Bricks: 0 x (2 + 1) = 2 Transport-type: tcp Bricks: Brick1: ovirt02.localdomain.local:/gluster/brick3/export Brick2: ovirt03.localdomain.local:/gluster/brick3/export Options Reconfigured: transport.address-family: inet performance.readdir-ahead: on performance.quick-read: off performance.read-ahead: off performance.io-cache: off performance.stat-prefetch: off cluster.eager-lock: enable network.remote-dio: off cluster.quorum-type: auto cluster.server-quorum-type: server storage.owner-uid: 36 storage.owner-gid: 36 features.shard: on features.shard-block-size: 512MB performance.low-prio-threads: 32 cluster.data-self-heal-algorithm: full cluster.locking-scheme: granular cluster.shd-wait-qlength: 10000 cluster.shd-max-threads: 6 network.ping-timeout: 30 user.cifs: off nfs.disable: on performance.strict-o-direct: on [root@ovirt02 peers]# And on ovirt03 [root@ovirt03 ~]# gluster volume info export Volume Name: export Type: Replicate Volume ID: b00e5839-becb-47e7-844f-6ce6ce1b7153 Status: Started Snapshot Count: 0 Number of Bricks: 0 x (2 + 1) = 2 Transport-type: tcp Bricks: Brick1: ovirt02.localdomain.local:/gluster/brick3/export Brick2: ovirt03.localdomain.local:/gluster/brick3/export Options Reconfigured: transport.address-family: inet performance.readdir-ahead: on performance.quick-read: off performance.read-ahead: off performance.io-cache: off performance.stat-prefetch: off cluster.eager-lock: enable network.remote-dio: off cluster.quorum-type: auto cluster.server-quorum-type: server storage.owner-uid: 36 storage.owner-gid: 36 features.shard: on features.shard-block-size: 512MB performance.low-prio-threads: 32 cluster.data-self-heal-algorithm: full cluster.locking-scheme: granular cluster.shd-wait-qlength: 10000 cluster.shd-max-threads: 6 network.ping-timeout: 30 user.cifs: off nfs.disable: on performance.strict-o-direct: on [root@ovirt03 ~]# While on ovirt01 it seems isolated... [root@ovirt01 ~]# gluster volume info export Volume Name: export Type: Replicate Volume ID: b00e5839-becb-47e7-844f-6ce6ce1b7153 Status: Started Snapshot Count: 0 Number of Bricks: 0 x (2 + 1) = 1 Transport-type: tcp Bricks: Brick1: gl01.localdomain.local:/gluster/brick3/export Options Reconfigured: transport.address-family: inet performance.readdir-ahead: on performance.quick-read: off performance.read-ahead: off performance.io-cache: off performance.stat-prefetch: off cluster.eager-lock: enable network.remote-dio: off cluster.quorum-type: auto cluster.server-quorum-type: server storage.owner-uid: 36 storage.owner-gid: 36 features.shard: on features.shard-block-size: 512MB performance.low-prio-threads: 32 cluster.data-self-heal-algorithm: full cluster.locking-scheme: granular cluster.shd-wait-qlength: 10000 cluster.shd-max-threads: 6 network.ping-timeout: 30 user.cifs: off nfs.disable: on performance.strict-o-direct: on [root@ovirt01 ~]#