refs #2509 use setsid, send signals to the pgrp #49

Merged
nserrano merged 1 commits from setsid into main 2025-07-29 14:13:57 +02:00
4 changed files with 20 additions and 8 deletions

View File

@ -6,6 +6,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [7.2.1] - 2025-07-29
### Changed
- Run process in a new POSIX session and group, send termination signals to the whole process group
## [7.2.0] - 2025-07-28
### Added

View File

@ -1,3 +1,9 @@
ogagent (7.2.1-1) stable; urgency=medium
* Run process in a new POSIX session and group, send termination signals to the whole process group
-- OpenGnsys developers <info@opengnsys.es> Tue, 29 Jul 2025 12:52:08 +0200
ogagent (7.2.0-1) stable; urgency=medium
* Log whether we are in ogLive or not

View File

@ -1 +1 @@
7.2.0
7.2.1

View File

@ -158,10 +158,10 @@ class ogLiveWorker(ServerWorker):
if 'thread' not in self.thread_list[job_id]: return { 'res': 2, 'der': 'Job is not running' }
t = self.thread_list[job_id]['thread']
pid = self.thread_list[job_id]['child_pid']
logger.debug (f'pid ({pid})')
logger.debug (f'pid/pgid/sid ({pid})')
try_times = 8
sig = signal.SIGTERM
msg = f'could not kill pid ({pid}) after ({try_times}) tries'
msg = f'could not killpg pid ({pid}) after ({try_times}) tries'
success = 2 ## mimic cmd['res'] in respuestaEjecucionComando(): "1" means success, "2" means failed
while True:
t.join (0.05)
@ -175,10 +175,10 @@ class ogLiveWorker(ServerWorker):
## this is fine in the first iteration of the loop, before we send any signals. In the rest of iterations, after some signals were sent, msg should be 'job terminated' instead.
if pid:
if os.path.exists (f'/proc/{pid}'):
logger.debug (f'sending signal ({sig}) to pid ({pid})')
logger.debug (f'sending signal ({sig}) to process group ({pid})')
## if the process finishes just here, nothing happens: the signal is sent to the void
os.kill (pid, sig)
#subprocess.run (['kill', '--signal', str(sig), str(pid)])
os.killpg (pid, sig)
#subprocess.run (['kill', '--signal', str(sig), f'-{pid}']) ## negative PID is used for sending signals to the process group
else:
msg = f'pid ({pid}) is gone, nothing to kill'
success = 1
@ -283,9 +283,9 @@ class ogLiveWorker(ServerWorker):
proc = ['bash', '-c', '{} {}'.format (devel_bash_prefix, exe)]
logger.debug ('subprocess.run ("{}")'.format (' '.join (proc)))
p = subprocess.Popen (proc, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
p = subprocess.Popen (proc, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=True)
if self.pid_q:
self.pid_q.put (p.pid)
self.pid_q.put (p.pid) ## p.pid is also a session ID and a process group ID--we'll use it later to send signals to the whole group
else:
## esto sucede por ejemplo cuando arranca el agente, que estamos en interfaceAdmin() en el mismo hilo, sin _long_running_job ni hilo separado
#logger.debug ('no queue--not writing any PID to it')