added a mechanism to prevent starbench to hang in case the executed command fails
I had the case where on_exit() was never called because proc had no value and therefore the attempt to pass proc.pid to on_exit caused an exception before on_exit was called. As a result, the mater thread was waiting its children threads forever, as these child never signaled that they finished. work related to [https://bugzilla.ipr.univ-rennes.fr/show_bug.cgi?id=3372]
This commit is contained in:
parent
d71bf3f67f
commit
f2ceeb2cdb
|
@ -160,22 +160,32 @@ class CommandPerfEstimator(): # (false positive) pylint: disable=function-redef
|
|||
def run_in_thread(popen_args: List[str], on_exit: Callable[[ProcessId, ReturnCode, RunId], None]):
|
||||
stdout = None
|
||||
stderr = None
|
||||
returncode = -1
|
||||
pid = -1
|
||||
streams_are_ok = True
|
||||
try:
|
||||
# with open(stdout_filepath, 'w', encoding='utf8') as stdout, open(stderr_filepath, 'w', encoding='utf8') as stderr:
|
||||
if stdout_filepath is not None:
|
||||
stdout = open(stdout_filepath, 'w', encoding='utf8')
|
||||
if stderr_filepath is not None:
|
||||
stderr = open(stderr_filepath, 'w', encoding='utf8')
|
||||
except:
|
||||
print(f'failed to open {stdout_filepath} or {stderr_filepath} in write mode')
|
||||
streams_are_ok = False
|
||||
if streams_are_ok:
|
||||
try:
|
||||
env = os.environ.copy()
|
||||
# restrict the number of threads used by openmp
|
||||
env['OMP_NUM_THREADS'] = f'{self.num_cores_per_run}'
|
||||
# restrict the nu,ber of threads used by intel math kernel library
|
||||
env['MKL_NUM_THREADS'] = f'{self.num_cores_per_run}'
|
||||
proc = subprocess.Popen(popen_args, cwd=cwd, stdout=stdout, stderr=stderr, env=env)
|
||||
pid = proc.pid
|
||||
proc.wait()
|
||||
if stderr is not None:
|
||||
stderr.close()
|
||||
if stdout is not None:
|
||||
stdout.close()
|
||||
on_exit(proc.pid, proc.returncode, run_id)
|
||||
returncode = proc.returncode
|
||||
except:
|
||||
print(f'command failed: {popen_args}')
|
||||
on_exit(pid, returncode, run_id)
|
||||
return
|
||||
thread = threading.Thread(target=run_in_thread, args=(popen_args, on_exit))
|
||||
thread.start()
|
||||
|
@ -255,8 +265,8 @@ class CommandPerfEstimator(): # (false positive) pylint: disable=function-redef
|
|||
with self._runs_lock:
|
||||
run = Run(self._next_run_id, worker_id)
|
||||
self._next_run_id += 1
|
||||
_run_thread = self.popen_and_call(popen_args=run_command, on_exit=self.on_exit, run_id=run.id, cwd=run_command_cwd, stdout_filepath=stdout_filepath, stderr_filepath=stderr_filepath) # noqa:F841
|
||||
self._runs[run.id] = run
|
||||
_run_thread = self.popen_and_call(popen_args=run_command, on_exit=self.on_exit, run_id=run.id, cwd=run_command_cwd, stdout_filepath=stdout_filepath, stderr_filepath=stderr_filepath) # noqa:F841
|
||||
|
||||
def run(self) -> DurationInSeconds:
|
||||
'''performs the runs of the command and returns the runs' average duration'''
|
||||
|
|
|
@ -77,7 +77,8 @@ def starbench_cmake_app(source_code_provider: IFileTreeProvider, tmp_dir: Path,
|
|||
max_num_cores=num_cores,
|
||||
stop_condition=StopAfterSingleRun(),
|
||||
run_command_cwd=Path('/tmp'),
|
||||
stdout_filepath=None)
|
||||
stdout_filepath=worker_dir / 'createdir_stdout.txt',
|
||||
stderr_filepath=worker_dir / 'createdir_stderr.txt')
|
||||
_create_build_dir_duration = create_build_dir.run() # noqa: F841
|
||||
# build_dir.mkdir(exist_ok=True)
|
||||
|
||||
|
|
Loading…
Reference in New Issue