xref: /llvm-project/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py (revision 2238dcc39358353cac21df75c3c3286ab20b8f53)
1"""
2Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3See https://llvm.org/LICENSE.txt for license information.
4SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5
6Provides classes used by the test results reporting infrastructure
7within the LLDB test suite.
8
9
10This module provides process-management support for the LLDB test
11running infrastructure.
12"""
13
14# System imports
15import os
16import re
17import signal
18import subprocess
19import sys
20import threading
21
22
23class CommunicatorThread(threading.Thread):
24    """Provides a thread class that communicates with a subprocess."""
25
26    def __init__(self, process, event, output_file):
27        super(CommunicatorThread, self).__init__()
28        # Don't let this thread prevent shutdown.
29        self.daemon = True
30        self.process = process
31        self.pid = process.pid
32        self.event = event
33        self.output_file = output_file
34        self.output = None
35
36    def run(self):
37        try:
38            # Communicate with the child process.
39            # This will not complete until the child process terminates.
40            self.output = self.process.communicate()
41        except Exception as exception:  # pylint: disable=broad-except
42            if self.output_file:
43                self.output_file.write(
44                    "exception while using communicate() for pid: {}\n".format(
45                        exception
46                    )
47                )
48        finally:
49            # Signal that the thread's run is complete.
50            self.event.set()
51
52
53# Provides a regular expression for matching gtimeout-based durations.
54TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$")
55
56
57def timeout_to_seconds(timeout):
58    """Converts timeout/gtimeout timeout values into seconds.
59
60    @param timeout a timeout in the form of xm representing x minutes.
61
62    @return None if timeout is None, or the number of seconds as a float
63    if a valid timeout format was specified.
64    """
65    if timeout is None:
66        return None
67    else:
68        match = TIMEOUT_REGEX.match(timeout)
69        if match:
70            value = float(match.group(1))
71            units = match.group(2)
72            if units is None:
73                # default is seconds.  No conversion necessary.
74                return value
75            elif units == "s":
76                # Seconds.  No conversion necessary.
77                return value
78            elif units == "m":
79                # Value is in minutes.
80                return 60.0 * value
81            elif units == "h":
82                # Value is in hours.
83                return (60.0 * 60.0) * value
84            elif units == "d":
85                # Value is in days.
86                return 24 * (60.0 * 60.0) * value
87            else:
88                raise Exception("unexpected units value '{}'".format(units))
89        else:
90            raise Exception("could not parse TIMEOUT spec '{}'".format(timeout))
91
92
93class ProcessHelper(object):
94    """Provides an interface for accessing process-related functionality.
95
96    This class provides a factory method that gives the caller a
97    platform-specific implementation instance of the class.
98
99    Clients of the class should stick to the methods provided in this
100    base class.
101
102    \see ProcessHelper.process_helper()
103    """
104
105    def __init__(self):
106        super(ProcessHelper, self).__init__()
107
108    @classmethod
109    def process_helper(cls):
110        """Returns a platform-specific ProcessHelper instance.
111        @return a ProcessHelper instance that does the right thing for
112        the current platform.
113        """
114
115        # If you add a new platform, create an instance here and
116        # return it.
117        if os.name == "nt":
118            return WindowsProcessHelper()
119        else:
120            # For all POSIX-like systems.
121            return UnixProcessHelper()
122
123    def create_piped_process(self, command, new_process_group=True):
124        # pylint: disable=no-self-use,unused-argument
125        # As expected.  We want derived classes to implement this.
126        """Creates a subprocess.Popen-based class with I/O piped to the parent.
127
128        @param command the command line list as would be passed to
129        subprocess.Popen().  Use the list form rather than the string form.
130
131        @param new_process_group indicates if the caller wants the
132        process to be created in its own process group.  Each OS handles
133        this concept differently.  It provides a level of isolation and
134        can simplify or enable terminating the process tree properly.
135
136        @return a subprocess.Popen-like object.
137        """
138        raise Exception("derived class must implement")
139
140    def supports_soft_terminate(self):
141        # pylint: disable=no-self-use
142        # As expected.  We want derived classes to implement this.
143        """Indicates if the platform supports soft termination.
144
145        Soft termination is the concept of a terminate mechanism that
146        allows the target process to shut down nicely, but with the
147        catch that the process might choose to ignore it.
148
149        Platform supporter note: only mark soft terminate as supported
150        if the target process has some way to evade the soft terminate
151        request; otherwise, just support the hard terminate method.
152
153        @return True if the platform supports a soft terminate mechanism.
154        """
155        # By default, we do not support a soft terminate mechanism.
156        return False
157
158    def soft_terminate(self, popen_process, log_file=None, want_core=True):
159        # pylint: disable=no-self-use,unused-argument
160        # As expected.  We want derived classes to implement this.
161        """Attempts to terminate the process in a polite way.
162
163        This terminate method is intended to give the child process a
164        chance to clean up and exit on its own, possibly with a request
165        to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog,
166        etc.)  If new_process_group was set in the process creation method
167        and the platform supports it, this terminate call will attempt to
168        kill the whole process tree rooted in this child process.
169
170        @param popen_process the subprocess.Popen-like object returned
171        by one of the process-creation methods of this class.
172
173        @param log_file file-like object used to emit error-related
174        logging info.  May be None if no error-related info is desired.
175
176        @param want_core True if the caller would like to get a core
177        dump (or the analogous crash report) from the terminated process.
178        """
179        popen_process.terminate()
180
181    def hard_terminate(self, popen_process, log_file=None):
182        # pylint: disable=no-self-use,unused-argument
183        # As expected.  We want derived classes to implement this.
184        """Attempts to terminate the process immediately.
185
186        This terminate method is intended to kill child process in
187        a manner in which the child process has no ability to block,
188        and also has no ability to clean up properly.  If new_process_group
189        was specified when creating the process, and if the platform
190        implementation supports it, this will attempt to kill the
191        whole process tree rooted in the child process.
192
193        @param popen_process the subprocess.Popen-like object returned
194        by one of the process-creation methods of this class.
195
196        @param log_file file-like object used to emit error-related
197        logging info.  May be None if no error-related info is desired.
198        """
199        popen_process.kill()
200
201    def was_soft_terminate(self, returncode, with_core):
202        # pylint: disable=no-self-use,unused-argument
203        # As expected.  We want derived classes to implement this.
204        """Returns if Popen-like object returncode matches soft terminate.
205
206        @param returncode the returncode from the Popen-like object that
207        terminated with a given return code.
208
209        @param with_core indicates whether the returncode should match
210        a core-generating return signal.
211
212        @return True when the returncode represents what the system would
213        issue when a soft_terminate() with the given with_core arg occurred;
214        False otherwise.
215        """
216        if not self.supports_soft_terminate():
217            # If we don't support soft termination on this platform,
218            # then this should always be False.
219            return False
220        else:
221            # Once a platform claims to support soft terminate, it
222            # needs to be able to identify it by overriding this method.
223            raise Exception("platform needs to implement")
224
225    def was_hard_terminate(self, returncode):
226        # pylint: disable=no-self-use,unused-argument
227        # As expected.  We want derived classes to implement this.
228        """Returns if Popen-like object returncode matches that of a hard
229        terminate attempt.
230
231        @param returncode the returncode from the Popen-like object that
232        terminated with a given return code.
233
234        @return True when the returncode represents what the system would
235        issue when a hard_terminate() occurred; False
236        otherwise.
237        """
238        raise Exception("platform needs to implement")
239
240    def soft_terminate_signals(self):
241        # pylint: disable=no-self-use
242        """Retrieve signal numbers that can be sent to soft terminate.
243        @return a list of signal numbers that can be sent to soft terminate
244        a process, or None if not applicable.
245        """
246        return None
247
248    def is_exceptional_exit(self, popen_status):
249        """Returns whether the program exit status is exceptional.
250
251        Returns whether the return code from a Popen process is exceptional
252        (e.g. signals on POSIX systems).
253
254        Derived classes should override this if they can detect exceptional
255        program exit.
256
257        @return True if the given popen_status represents an exceptional
258        program exit; False otherwise.
259        """
260        return False
261
262    def exceptional_exit_details(self, popen_status):
263        """Returns the normalized exceptional exit code and a description.
264
265        Given an exceptional exit code, returns the integral value of the
266        exception (e.g. signal number for POSIX) and a description (e.g.
267        signal name on POSIX) for the result.
268
269        Derived classes should override this if they can detect exceptional
270        program exit.
271
272        It is fine to not implement this so long as is_exceptional_exit()
273        always returns False.
274
275        @return (normalized exception code, symbolic exception description)
276        """
277        raise Exception("exception_exit_details() called on unsupported class")
278
279
280class UnixProcessHelper(ProcessHelper):
281    """Provides a ProcessHelper for Unix-like operating systems.
282
283    This implementation supports anything that looks Posix-y
284    (e.g. Darwin, Linux, *BSD, etc.)
285    """
286
287    def __init__(self):
288        super(UnixProcessHelper, self).__init__()
289
290    @classmethod
291    def _create_new_process_group(cls):
292        """Creates a new process group for the calling process."""
293        os.setpgid(os.getpid(), os.getpid())
294
295    def create_piped_process(self, command, new_process_group=True):
296        # Determine what to run after the fork but before the exec.
297        if new_process_group:
298            preexec_func = self._create_new_process_group
299        else:
300            preexec_func = None
301
302        # Create the process.
303        process = subprocess.Popen(
304            command,
305            stdin=subprocess.PIPE,
306            stdout=subprocess.PIPE,
307            stderr=subprocess.PIPE,
308            universal_newlines=True,  # Elicits automatic byte -> string decoding in Py3
309            close_fds=True,
310            preexec_fn=preexec_func,
311        )
312
313        # Remember whether we're using process groups for this
314        # process.
315        process.using_process_groups = new_process_group
316        return process
317
318    def supports_soft_terminate(self):
319        # POSIX does support a soft terminate via:
320        # * SIGTERM (no core requested)
321        # * SIGQUIT (core requested if enabled, see ulimit -c)
322        return True
323
324    @classmethod
325    def _validate_pre_terminate(cls, popen_process, log_file):
326        # Validate args.
327        if popen_process is None:
328            raise ValueError("popen_process is None")
329
330        # Ensure we have something that looks like a valid process.
331        if popen_process.pid < 1:
332            if log_file:
333                log_file.write("skipping soft_terminate(): no process id")
334            return False
335
336        # We only do the process liveness check if we're not using
337        # process groups.  With process groups, checking if the main
338        # inferior process is dead and short circuiting here is no
339        # good - children of it in the process group could still be
340        # alive, and they should be killed during a timeout.
341        if not popen_process.using_process_groups:
342            # Don't kill if it's already dead.
343            popen_process.poll()
344            if popen_process.returncode is not None:
345                # It has a returncode.  It has already stopped.
346                if log_file:
347                    log_file.write(
348                        "requested to terminate pid {} but it has already "
349                        "terminated, returncode {}".format(
350                            popen_process.pid, popen_process.returncode
351                        )
352                    )
353                # Move along...
354                return False
355
356        # Good to go.
357        return True
358
359    def _kill_with_signal(self, popen_process, log_file, signum):
360        # Validate we're ready to terminate this.
361        if not self._validate_pre_terminate(popen_process, log_file):
362            return
363
364        # Choose kill mechanism based on whether we're targeting
365        # a process group or just a process.
366        try:
367            if popen_process.using_process_groups:
368                # if log_file:
369                #    log_file.write(
370                #        "sending signum {} to process group {} now\n".format(
371                #            signum, popen_process.pid))
372                os.killpg(popen_process.pid, signum)
373            else:
374                # if log_file:
375                #    log_file.write(
376                #        "sending signum {} to process {} now\n".format(
377                #            signum, popen_process.pid))
378                os.kill(popen_process.pid, signum)
379        except OSError as error:
380            import errno
381
382            if error.errno == errno.ESRCH:
383                # This is okay - failed to find the process.  It may be that
384                # that the timeout pre-kill hook eliminated the process.  We'll
385                # ignore.
386                pass
387            else:
388                raise
389
390    def soft_terminate(self, popen_process, log_file=None, want_core=True):
391        # Choose signal based on desire for core file.
392        if want_core:
393            # SIGQUIT will generate core by default.  Can be caught.
394            signum = signal.SIGQUIT
395        else:
396            # SIGTERM is the traditional nice way to kill a process.
397            # Can be caught, doesn't generate a core.
398            signum = signal.SIGTERM
399
400        self._kill_with_signal(popen_process, log_file, signum)
401
402    def hard_terminate(self, popen_process, log_file=None):
403        self._kill_with_signal(popen_process, log_file, signal.SIGKILL)
404
405    def was_soft_terminate(self, returncode, with_core):
406        if with_core:
407            return returncode == -signal.SIGQUIT
408        else:
409            return returncode == -signal.SIGTERM
410
411    def was_hard_terminate(self, returncode):
412        return returncode == -signal.SIGKILL
413
414    def soft_terminate_signals(self):
415        return [signal.SIGQUIT, signal.SIGTERM]
416
417    def is_exceptional_exit(self, popen_status):
418        return popen_status < 0
419
420    @classmethod
421    def _signal_names_by_number(cls):
422        return dict(
423            (k, v)
424            for v, k in reversed(sorted(signal.__dict__.items()))
425            if v.startswith("SIG") and not v.startswith("SIG_")
426        )
427
428    def exceptional_exit_details(self, popen_status):
429        signo = -popen_status
430        signal_names_by_number = self._signal_names_by_number()
431        signal_name = signal_names_by_number.get(signo, "")
432        return (signo, signal_name)
433
434
435class WindowsProcessHelper(ProcessHelper):
436    """Provides a Windows implementation of the ProcessHelper class."""
437
438    def __init__(self):
439        super(WindowsProcessHelper, self).__init__()
440
441    def create_piped_process(self, command, new_process_group=True):
442        if new_process_group:
443            # We need this flag if we want os.kill() to work on the subprocess.
444            creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP
445        else:
446            creation_flags = 0
447
448        return subprocess.Popen(
449            command,
450            stdin=subprocess.PIPE,
451            stdout=subprocess.PIPE,
452            stderr=subprocess.PIPE,
453            universal_newlines=True,  # Elicits automatic byte -> string decoding in Py3
454            creationflags=creation_flags,
455        )
456
457    def was_hard_terminate(self, returncode):
458        return returncode != 0
459
460
461class ProcessDriver(object):
462    """Drives a child process, notifies on important events, and can timeout.
463
464    Clients are expected to derive from this class and override the
465    on_process_started and on_process_exited methods if they want to
466    hook either of those.
467
468    This class supports timing out the child process in a platform-agnostic
469    way.  The on_process_exited method is informed if the exit was natural
470    or if it was due to a timeout.
471    """
472
473    def __init__(self, soft_terminate_timeout=10.0):
474        super(ProcessDriver, self).__init__()
475        self.process_helper = ProcessHelper.process_helper()
476        self.pid = None
477        # Create the synchronization event for notifying when the
478        # inferior dotest process is complete.
479        self.done_event = threading.Event()
480        self.io_thread = None
481        self.process = None
482        # Number of seconds to wait for the soft terminate to
483        # wrap up, before moving to more drastic measures.
484        # Might want this longer if core dumps are generated and
485        # take a long time to write out.
486        self.soft_terminate_timeout = soft_terminate_timeout
487        # Number of seconds to wait for the hard terminate to
488        # wrap up, before giving up on the io thread.  This should
489        # be fast.
490        self.hard_terminate_timeout = 5.0
491        self.returncode = None
492
493    # =============================================
494    # Methods for subclasses to override if desired.
495    # =============================================
496
497    def on_process_started(self):
498        pass
499
500    def on_process_exited(self, command, output, was_timeout, exit_status):
501        pass
502
503    def on_timeout_pre_kill(self):
504        """Called after the timeout interval elapses but before killing it.
505
506        This method is added to enable derived classes the ability to do
507        something to the process prior to it being killed.  For example,
508        this would be a good spot to run a program that samples the process
509        to see what it was doing (or not doing).
510
511        Do not attempt to reap the process (i.e. use wait()) in this method.
512        That will interfere with the kill mechanism and return code processing.
513        """
514
515    def write(self, content):
516        # pylint: disable=no-self-use
517        # Intended - we want derived classes to be able to override
518        # this and use any self state they may contain.
519        sys.stdout.write(content)
520
521    # ==============================================================
522    # Operations used to drive processes.  Clients will want to call
523    # one of these.
524    # ==============================================================
525
526    def run_command(self, command):
527        # Start up the child process and the thread that does the
528        # communication pump.
529        self._start_process_and_io_thread(command)
530
531        # Wait indefinitely for the child process to finish
532        # communicating.  This indicates it has closed stdout/stderr
533        # pipes and is done.
534        self.io_thread.join()
535        self.returncode = self.process.wait()
536        if self.returncode is None:
537            raise Exception(
538                "no exit status available for pid {} after the "
539                " inferior dotest.py should have completed".format(self.process.pid)
540            )
541
542        # Notify of non-timeout exit.
543        self.on_process_exited(command, self.io_thread.output, False, self.returncode)
544
545    def run_command_with_timeout(self, command, timeout, want_core):
546        # Figure out how many seconds our timeout description is requesting.
547        timeout_seconds = timeout_to_seconds(timeout)
548
549        # Start up the child process and the thread that does the
550        # communication pump.
551        self._start_process_and_io_thread(command)
552
553        self._wait_with_timeout(timeout_seconds, command, want_core)
554
555    # ================
556    # Internal details.
557    # ================
558
559    def _start_process_and_io_thread(self, command):
560        # Create the process.
561        self.process = self.process_helper.create_piped_process(command)
562        self.pid = self.process.pid
563        self.on_process_started()
564
565        # Ensure the event is cleared that is used for signaling
566        # from the communication() thread when communication is
567        # complete (i.e. the inferior process has finished).
568        self.done_event.clear()
569
570        self.io_thread = CommunicatorThread(self.process, self.done_event, self.write)
571        self.io_thread.start()
572
573    def _attempt_soft_kill(self, want_core):
574        # The inferior dotest timed out.  Attempt to clean it
575        # with a non-drastic method (so it can clean up properly
576        # and/or generate a core dump).  Often the OS can't guarantee
577        # that the process will really terminate after this.
578        self.process_helper.soft_terminate(
579            self.process, want_core=want_core, log_file=self
580        )
581
582        # Now wait up to a certain timeout period for the io thread
583        # to say that the communication ended.  If that wraps up
584        # within our soft terminate timeout, we're all done here.
585        self.io_thread.join(self.soft_terminate_timeout)
586        if not self.io_thread.is_alive():
587            # stdout/stderr were closed on the child process side. We
588            # should be able to wait and reap the child process here.
589            self.returncode = self.process.wait()
590            # We terminated, and the done_trying result is n/a
591            terminated = True
592            done_trying = None
593        else:
594            self.write(
595                "soft kill attempt of process {} timed out "
596                "after {} seconds\n".format(
597                    self.process.pid, self.soft_terminate_timeout
598                )
599            )
600            terminated = False
601            done_trying = False
602        return terminated, done_trying
603
604    def _attempt_hard_kill(self):
605        # Instruct the process to terminate and really force it to
606        # happen.  Don't give the process a chance to ignore.
607        self.process_helper.hard_terminate(self.process, log_file=self)
608
609        # Reap the child process.  This should not hang as the
610        # hard_kill() mechanism is supposed to really kill it.
611        # Improvement option:
612        # If this does ever hang, convert to a self.process.poll()
613        # loop checking on self.process.returncode until it is not
614        # None or the timeout occurs.
615        self.returncode = self.process.wait()
616
617        # Wait a few moments for the io thread to finish...
618        self.io_thread.join(self.hard_terminate_timeout)
619        if self.io_thread.is_alive():
620            # ... but this is not critical if it doesn't end for some
621            # reason.
622            self.write(
623                "hard kill of process {} timed out after {} seconds waiting "
624                "for the io thread (ignoring)\n".format(
625                    self.process.pid, self.hard_terminate_timeout
626                )
627            )
628
629        # Set if it terminated.  (Set up for optional improvement above).
630        terminated = self.returncode is not None
631        # Nothing else to try.
632        done_trying = True
633
634        return terminated, done_trying
635
636    def _attempt_termination(self, attempt_count, want_core):
637        if self.process_helper.supports_soft_terminate():
638            # When soft termination is supported, we first try to stop
639            # the process with a soft terminate.  Failing that, we try
640            # the hard terminate option.
641            if attempt_count == 1:
642                return self._attempt_soft_kill(want_core)
643            elif attempt_count == 2:
644                return self._attempt_hard_kill()
645            else:
646                # We don't have anything else to try.
647                terminated = self.returncode is not None
648                done_trying = True
649                return terminated, done_trying
650        else:
651            # We only try the hard terminate option when there
652            # is no soft terminate available.
653            if attempt_count == 1:
654                return self._attempt_hard_kill()
655            else:
656                # We don't have anything else to try.
657                terminated = self.returncode is not None
658                done_trying = True
659                return terminated, done_trying
660
661    def _wait_with_timeout(self, timeout_seconds, command, want_core):
662        # Allow up to timeout seconds for the io thread to wrap up.
663        # If that completes, the child process should be done.
664        completed_normally = self.done_event.wait(timeout_seconds)
665        if completed_normally:
666            # Reap the child process here.
667            self.returncode = self.process.wait()
668        else:
669            # Allow derived classes to do some work after we detected
670            # a timeout but before we touch the timed-out process.
671            self.on_timeout_pre_kill()
672
673            # Prepare to stop the process
674            process_terminated = completed_normally
675            terminate_attempt_count = 0
676
677            # Try as many attempts as we support for trying to shut down
678            # the child process if it's not already shut down.
679            while not process_terminated:
680                terminate_attempt_count += 1
681                # Attempt to terminate.
682                process_terminated, done_trying = self._attempt_termination(
683                    terminate_attempt_count, want_core
684                )
685                # Check if there's nothing more to try.
686                if done_trying:
687                    # Break out of our termination attempt loop.
688                    break
689
690        # At this point, we're calling it good.  The process
691        # finished gracefully, was shut down after one or more
692        # attempts, or we failed but gave it our best effort.
693        self.on_process_exited(
694            command, self.io_thread.output, not completed_normally, self.returncode
695        )
696
697
698def patched_init(self, *args, **kwargs):
699    self.original_init(*args, **kwargs)
700    # Initialize our condition variable that protects wait()/poll().
701    self.wait_condition = threading.Condition()
702
703
704def patched_wait(self, *args, **kwargs):
705    self.wait_condition.acquire()
706    try:
707        result = self.original_wait(*args, **kwargs)
708        # The process finished.  Signal the condition.
709        self.wait_condition.notify_all()
710        return result
711    finally:
712        self.wait_condition.release()
713
714
715def patched_poll(self, *args, **kwargs):
716    self.wait_condition.acquire()
717    try:
718        result = self.original_poll(*args, **kwargs)
719        if self.returncode is not None:
720            # We did complete, and we have the return value.
721            # Signal the event to indicate we're done.
722            self.wait_condition.notify_all()
723        return result
724    finally:
725        self.wait_condition.release()
726
727
728def patch_up_subprocess_popen():
729    subprocess.Popen.original_init = subprocess.Popen.__init__
730    subprocess.Popen.__init__ = patched_init
731
732    subprocess.Popen.original_wait = subprocess.Popen.wait
733    subprocess.Popen.wait = patched_wait
734
735    subprocess.Popen.original_poll = subprocess.Popen.poll
736    subprocess.Popen.poll = patched_poll
737
738
739# Replace key subprocess.Popen() threading-unprotected methods with
740# threading-protected versions.
741patch_up_subprocess_popen()
742