
I attempted to run manual OST twice and both failed with below issue. Can someone take a look? Thanks, Piotr 2017-10-20 07:59:12,485::log_utils.py::__exit__::607::ovirtlago.prefix::DEBUG:: File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args) 2017-10-20 07:59:12,485::cmd.py::do_run::365::root::ERROR::Error occured, aborting Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 362, in do_run self.cli_plugins[args.ovirtverb].do_run(args) File "/usr/lib/python2.7/site-packages/lago/plugins/cli.py", line 184, in do_run self._do_run(**vars(args)) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 501, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 512, in wrapper return func(*args, prefix=prefix, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 166, in do_deploy prefix.deploy() File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args) error: [Errno 98] Address already in use

Evgheni, Was there any change recently to Lago slaves? On Fri, Oct 20, 2017 at 11:05 AM, Piotr Kliczewski < piotr.kliczewski@gmail.com> wrote:
I attempted to run manual OST twice and both failed with below issue. Can someone take a look?
Thanks, Piotr
2017-10-20 07:59:12,485::log_utils.py::__exit__::607::ovirtlago.prefix: :DEBUG:: File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args)
2017-10-20 07:59:12,485::cmd.py::do_run::365::root::ERROR::Error occured, aborting Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 362, in do_run self.cli_plugins[args.ovirtverb].do_run(args) File "/usr/lib/python2.7/site-packages/lago/plugins/cli.py", line 184, in do_run self._do_run(**vars(args)) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 501, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 512, in wrapper return func(*args, prefix=prefix, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 166, in do_deploy prefix.deploy() File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args) error: [Errno 98] Address already in use _______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
-- Eyal edri MANAGER RHV DevOps EMEA VIRTUALIZATION R&D Red Hat EMEA <https://www.redhat.com/> <https://red.ht/sig> TRIED. TESTED. TRUSTED. <https://redhat.com/trusted> phone: +972-9-7692018 irc: eedri (on #tlv #rhev-dev #rhev-integ)

looks like there might be a lago localrepo process process left up on the slave from a previous run On 20 October 2017 at 11:26, Eyal Edri <eedri@redhat.com> wrote:
Evgheni, Was there any change recently to Lago slaves?
On Fri, Oct 20, 2017 at 11:05 AM, Piotr Kliczewski < piotr.kliczewski@gmail.com> wrote:
I attempted to run manual OST twice and both failed with below issue. Can someone take a look?
Thanks, Piotr
2017-10-20 07:59:12,485::log_utils.py::__exit__::607::ovirtlago.prefix: :DEBUG:: File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args)
2017-10-20 07:59:12,485::cmd.py::do_run::365::root::ERROR::Error occured, aborting Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 362, in do_run self.cli_plugins[args.ovirtverb].do_run(args) File "/usr/lib/python2.7/site-packages/lago/plugins/cli.py", line 184, in do_run self._do_run(**vars(args)) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 501, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 512, in wrapper return func(*args, prefix=prefix, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 166, in do_deploy prefix.deploy() File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args) error: [Errno 98] Address already in use _______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
--
Eyal edri
MANAGER
RHV DevOps
EMEA VIRTUALIZATION R&D
Red Hat EMEA <https://www.redhat.com/> <https://red.ht/sig> TRIED. TESTED. TRUSTED. <https://redhat.com/trusted> phone: +972-9-7692018 <+972%209-769-2018> irc: eedri (on #tlv #rhev-dev #rhev-integ)
_______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
-- Barak Korren RHV DevOps team , RHCE, RHCi Red Hat EMEA redhat.com | TRIED. TESTED. TRUSTED. | redhat.com/trusted

I agree with Barak - checked the slave that was failing and there was a process still listening to port 8585. The slave was put offline the slave but attempting to run the job on a different one caused the exact same error. As more slaves are affected this may be a lago bug. No changes were made on slaves this week. On Fri, Oct 20, 2017 at 10:46 AM, Barak Korren <bkorren@redhat.com> wrote:
looks like there might be a lago localrepo process process left up on the slave from a previous run
On 20 October 2017 at 11:26, Eyal Edri <eedri@redhat.com> wrote:
Evgheni, Was there any change recently to Lago slaves?
On Fri, Oct 20, 2017 at 11:05 AM, Piotr Kliczewski < piotr.kliczewski@gmail.com> wrote:
I attempted to run manual OST twice and both failed with below issue. Can someone take a look?
Thanks, Piotr
2017-10-20 07:59:12,485::log_utils.py::__exit__::607::ovirtlago.prefix: :DEBUG:: File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args)
2017-10-20 07:59:12,485::cmd.py::do_run::365::root::ERROR::Error occured, aborting Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 362, in do_run self.cli_plugins[args.ovirtverb].do_run(args) File "/usr/lib/python2.7/site-packages/lago/plugins/cli.py", line 184, in do_run self._do_run(**vars(args)) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 501, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 512, in wrapper return func(*args, prefix=prefix, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 166, in do_deploy prefix.deploy() File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args) error: [Errno 98] Address already in use _______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
--
Eyal edri
MANAGER
RHV DevOps
EMEA VIRTUALIZATION R&D
Red Hat EMEA <https://www.redhat.com/> <https://red.ht/sig> TRIED. TESTED. TRUSTED. <https://redhat.com/trusted> phone: +972-9-7692018 <+972%209-769-2018> irc: eedri (on #tlv #rhev-dev #rhev-integ)
_______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
-- Barak Korren RHV DevOps team , RHCE, RHCi Red Hat EMEA redhat.com | TRIED. TESTED. TRUSTED. | redhat.com/trusted
-- Regards, Evgheni Dereveanchin

This issue is caused when calling to "lago ovirt serve" (which starts the repo server) as a subprocess, and not making sure to kill it when it's not needed anymore (or on failure). In the past, VDSM's check patch was coded like this, but we fixed it. Could be that the same bug exists in another suite. Evgheni, can you specify a slave that had this issue? On Fri, Oct 20, 2017 at 3:44 PM, Evgheni Dereveanchin <ederevea@redhat.com> wrote:
I agree with Barak - checked the slave that was failing and there was a process still listening to port 8585. The slave was put offline the slave but attempting to run the job on a different one caused the exact same error. As more slaves are affected this may be a lago bug. No changes were made on slaves this week.
On Fri, Oct 20, 2017 at 10:46 AM, Barak Korren <bkorren@redhat.com> wrote:
looks like there might be a lago localrepo process process left up on the slave from a previous run
On 20 October 2017 at 11:26, Eyal Edri <eedri@redhat.com> wrote:
Evgheni, Was there any change recently to Lago slaves?
On Fri, Oct 20, 2017 at 11:05 AM, Piotr Kliczewski < piotr.kliczewski@gmail.com> wrote:
I attempted to run manual OST twice and both failed with below issue. Can someone take a look?
Thanks, Piotr
2017-10-20 07:59:12,485::log_utils.py::__exit__::607::ovirtlago.prefix: :DEBUG:: File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args)
2017-10-20 07:59:12,485::cmd.py::do_run::365::root::ERROR::Error occured, aborting Traceback (most recent call last): File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 362, in do_run self.cli_plugins[args.ovirtverb].do_run(args) File "/usr/lib/python2.7/site-packages/lago/plugins/cli.py", line 184, in do_run self._do_run(**vars(args)) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 501, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/lago/utils.py", line 512, in wrapper return func(*args, prefix=prefix, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/cmd.py", line 166, in do_deploy prefix.deploy() File "/usr/lib/python2.7/site-packages/lago/log_utils.py", line 636, in wrapper return func(*args, **kwargs) File "/usr/lib/python2.7/site-packages/ovirtlago/reposetup.py", line 111, in wrapper with utils.repo_server_context(args[0]): File "/usr/lib64/python2.7/contextlib.py", line 17, in __enter__ return self.gen.next() File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 100, in repo_server_context root_dir=prefix.paths.internal_repo(), File "/usr/lib/python2.7/site-packages/ovirtlago/utils.py", line 76, in _create_http_server generate_request_handler(root_dir), File "/usr/lib64/python2.7/SocketServer.py", line 419, in __init__ self.server_bind() File "/usr/lib64/python2.7/BaseHTTPServer.py", line 108, in server_bind SocketServer.TCPServer.server_bind(self) File "/usr/lib64/python2.7/SocketServer.py", line 430, in server_bind self.socket.bind(self.server_address) File "/usr/lib64/python2.7/socket.py", line 224, in meth return getattr(self._sock,name)(*args) error: [Errno 98] Address already in use _______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
--
Eyal edri
MANAGER
RHV DevOps
EMEA VIRTUALIZATION R&D
Red Hat EMEA <https://www.redhat.com/> <https://red.ht/sig> TRIED. TESTED. TRUSTED. <https://redhat.com/trusted> phone: +972-9-7692018 <+972%209-769-2018> irc: eedri (on #tlv #rhev-dev #rhev-integ)
_______________________________________________ Infra mailing list Infra@ovirt.org http://lists.ovirt.org/mailman/listinfo/infra
-- Barak Korren RHV DevOps team , RHCE, RHCi Red Hat EMEA redhat.com | TRIED. TESTED. TRUSTED. | redhat.com/trusted
-- Regards, Evgheni Dereveanchin
-- *GAL bEN HAIM* RHV DEVOPS
participants (5)
-
Barak Korren
-
Evgheni Dereveanchin
-
Eyal Edri
-
Gal Ben Haim
-
Piotr Kliczewski