xref: /llvm-project/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py (revision ca5793ea5c49842bc234b45a88467fd74667573b)
1"""
2The LLVM Compiler Infrastructure
3
4This file is distributed under the University of Illinois Open Source
5License. See LICENSE.TXT for details.
6
7Provides classes used by the test results reporting infrastructure
8within the LLDB test suite.
9
10
11This module provides process-management support for the LLDB test
12running infrasructure.
13"""
14
15# System imports
16import os
17import re
18import signal
19import subprocess
20import sys
21import threading
22
23
24class CommunicatorThread(threading.Thread):
25    """Provides a thread class that communicates with a subprocess."""
26    def __init__(self, process, event, output_file):
27        super(CommunicatorThread, self).__init__()
28        # Don't let this thread prevent shutdown.
29        self.daemon = True
30        self.process = process
31        self.pid = process.pid
32        self.event = event
33        self.output_file = output_file
34        self.output = None
35
36    def run(self):
37        try:
38            # Communicate with the child process.
39            # This will not complete until the child process terminates.
40            self.output = self.process.communicate()
41        except Exception as exception:  # pylint: disable=broad-except
42            if self.output_file:
43                self.output_file.write(
44                    "exception while using communicate() for pid: {}\n".format(
45                        exception))
46        finally:
47            # Signal that the thread's run is complete.
48            self.event.set()
49
50
51# Provides a regular expression for matching gtimeout-based durations.
52TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$")
53
54
55def timeout_to_seconds(timeout):
56    """Converts timeout/gtimeout timeout values into seconds.
57
58    @param timeout a timeout in the form of xm representing x minutes.
59
60    @return None if timeout is None, or the number of seconds as a float
61    if a valid timeout format was specified.
62    """
63    if timeout is None:
64        return None
65    else:
66        match = TIMEOUT_REGEX.match(timeout)
67        if match:
68            value = float(match.group(1))
69            units = match.group(2)
70            if units is None:
71                # default is seconds.  No conversion necessary.
72                return value
73            elif units == 's':
74                # Seconds.  No conversion necessary.
75                return value
76            elif units == 'm':
77                # Value is in minutes.
78                return 60.0 * value
79            elif units == 'h':
80                # Value is in hours.
81                return (60.0 * 60.0) * value
82            elif units == 'd':
83                # Value is in days.
84                return 24 * (60.0 * 60.0) * value
85            else:
86                raise Exception("unexpected units value '{}'".format(units))
87        else:
88            raise Exception("could not parse TIMEOUT spec '{}'".format(
89                timeout))
90
91
92class ProcessHelper(object):
93    """Provides an interface for accessing process-related functionality.
94
95    This class provides a factory method that gives the caller a
96    platform-specific implementation instance of the class.
97
98    Clients of the class should stick to the methods provided in this
99    base class.
100
101    @see ProcessHelper.process_helper()
102    """
103    def __init__(self):
104        super(ProcessHelper, self).__init__()
105
106    @classmethod
107    def process_helper(cls):
108        """Returns a platform-specific ProcessHelper instance.
109        @return a ProcessHelper instance that does the right thing for
110        the current platform.
111        """
112
113        # If you add a new platform, create an instance here and
114        # return it.
115        if os.name == "nt":
116            return WindowsProcessHelper()
117        else:
118            # For all POSIX-like systems.
119            return UnixProcessHelper()
120
121    def create_piped_process(self, command, new_process_group=True):
122        # pylint: disable=no-self-use,unused-argument
123        # As expected.  We want derived classes to implement this.
124        """Creates a subprocess.Popen-based class with I/O piped to the parent.
125
126        @param command the command line list as would be passed to
127        subprocess.Popen().  Use the list form rather than the string form.
128
129        @param new_process_group indicates if the caller wants the
130        process to be created in its own process group.  Each OS handles
131        this concept differently.  It provides a level of isolation and
132        can simplify or enable terminating the process tree properly.
133
134        @return a subprocess.Popen-like object.
135        """
136        raise Exception("derived class must implement")
137
138    def supports_soft_terminate(self):
139        # pylint: disable=no-self-use
140        # As expected.  We want derived classes to implement this.
141        """Indicates if the platform supports soft termination.
142
143        Soft termination is the concept of a terminate mechanism that
144        allows the target process to shut down nicely, but with the
145        catch that the process might choose to ignore it.
146
147        Platform supporter note: only mark soft terminate as supported
148        if the target process has some way to evade the soft terminate
149        request; otherwise, just support the hard terminate method.
150
151        @return True if the platform supports a soft terminate mechanism.
152        """
153        # By default, we do not support a soft terminate mechanism.
154        return False
155
156    def soft_terminate(self, popen_process, log_file=None, want_core=True):
157        # pylint: disable=no-self-use,unused-argument
158        # As expected.  We want derived classes to implement this.
159        """Attempts to terminate the process in a polite way.
160
161        This terminate method is intended to give the child process a
162        chance to clean up and exit on its own, possibly with a request
163        to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog,
164        etc.)  If new_process_group was set in the process creation method
165        and the platform supports it, this terminate call will attempt to
166        kill the whole process tree rooted in this child process.
167
168        @param popen_process the subprocess.Popen-like object returned
169        by one of the process-creation methods of this class.
170
171        @param log_file file-like object used to emit error-related
172        logging info.  May be None if no error-related info is desired.
173
174        @param want_core True if the caller would like to get a core
175        dump (or the analogous crash report) from the terminated process.
176        """
177        popen_process.terminate()
178
179    def hard_terminate(self, popen_process, log_file=None):
180        # pylint: disable=no-self-use,unused-argument
181        # As expected.  We want derived classes to implement this.
182        """Attempts to terminate the process immediately.
183
184        This terminate method is intended to kill child process in
185        a manner in which the child process has no ability to block,
186        and also has no ability to clean up properly.  If new_process_group
187        was specified when creating the process, and if the platform
188        implementation supports it, this will attempt to kill the
189        whole process tree rooted in the child process.
190
191        @param popen_process the subprocess.Popen-like object returned
192        by one of the process-creation methods of this class.
193
194        @param log_file file-like object used to emit error-related
195        logging info.  May be None if no error-related info is desired.
196        """
197        popen_process.kill()
198
199    def was_soft_terminate(self, returncode, with_core):
200        # pylint: disable=no-self-use,unused-argument
201        # As expected.  We want derived classes to implement this.
202        """Returns if Popen-like object returncode matches soft terminate.
203
204        @param returncode the returncode from the Popen-like object that
205        terminated with a given return code.
206
207        @param with_core indicates whether the returncode should match
208        a core-generating return signal.
209
210        @return True when the returncode represents what the system would
211        issue when a soft_terminate() with the given with_core arg occurred;
212        False otherwise.
213        """
214        if not self.supports_soft_terminate():
215            # If we don't support soft termination on this platform,
216            # then this should always be False.
217            return False
218        else:
219            # Once a platform claims to support soft terminate, it
220            # needs to be able to identify it by overriding this method.
221            raise Exception("platform needs to implement")
222
223    def was_hard_terminate(self, returncode):
224        # pylint: disable=no-self-use,unused-argument
225        # As expected.  We want derived classes to implement this.
226        """Returns if Popen-like object returncode matches that of a hard
227        terminate attempt.
228
229        @param returncode the returncode from the Popen-like object that
230        terminated with a given return code.
231
232        @return True when the returncode represents what the system would
233        issue when a hard_terminate() occurred; False
234        otherwise.
235        """
236        raise Exception("platform needs to implement")
237
238    def soft_terminate_signals(self):
239        # pylint: disable=no-self-use
240        """Retrieve signal numbers that can be sent to soft terminate.
241        @return a list of signal numbers that can be sent to soft terminate
242        a process, or None if not applicable.
243        """
244        return None
245
246    def is_exceptional_exit(self, popen_status):
247        """Returns whether the program exit status is exceptional.
248
249        Returns whether the return code from a Popen process is exceptional
250        (e.g. signals on POSIX systems).
251
252        Derived classes should override this if they can detect exceptional
253        program exit.
254
255        @return True if the given popen_status represents an exceptional
256        program exit; False otherwise.
257        """
258        return False
259
260    def exceptional_exit_details(self, popen_status):
261        """Returns the normalized exceptional exit code and a description.
262
263        Given an exceptional exit code, returns the integral value of the
264        exception (e.g. signal number for POSIX) and a description (e.g.
265        signal name on POSIX) for the result.
266
267        Derived classes should override this if they can detect exceptional
268        program exit.
269
270        It is fine to not implement this so long as is_exceptional_exit()
271        always returns False.
272
273        @return (normalized exception code, symbolic exception description)
274        """
275        raise Exception("exception_exit_details() called on unsupported class")
276
277
278class UnixProcessHelper(ProcessHelper):
279    """Provides a ProcessHelper for Unix-like operating systems.
280
281    This implementation supports anything that looks Posix-y
282    (e.g. Darwin, Linux, *BSD, etc.)
283    """
284    def __init__(self):
285        super(UnixProcessHelper, self).__init__()
286
287    @classmethod
288    def _create_new_process_group(cls):
289        """Creates a new process group for the calling process."""
290        os.setpgid(os.getpid(), os.getpid())
291
292    def create_piped_process(self, command, new_process_group=True):
293        # Determine what to run after the fork but before the exec.
294        if new_process_group:
295            preexec_func = self._create_new_process_group
296        else:
297            preexec_func = None
298
299        # Create the process.
300        process = subprocess.Popen(
301            command,
302            stdin=subprocess.PIPE,
303            stdout=subprocess.PIPE,
304            stderr=subprocess.PIPE,
305            universal_newlines=True, # Elicits automatic byte -> string decoding in Py3
306            close_fds=True,
307            preexec_fn=preexec_func)
308
309        # Remember whether we're using process groups for this
310        # process.
311        process.using_process_groups = new_process_group
312        return process
313
314    def supports_soft_terminate(self):
315        # POSIX does support a soft terminate via:
316        # * SIGTERM (no core requested)
317        # * SIGQUIT (core requested if enabled, see ulimit -c)
318        return True
319
320    @classmethod
321    def _validate_pre_terminate(cls, popen_process, log_file):
322        # Validate args.
323        if popen_process is None:
324            raise ValueError("popen_process is None")
325
326        # Ensure we have something that looks like a valid process.
327        if popen_process.pid < 1:
328            if log_file:
329                log_file.write("skipping soft_terminate(): no process id")
330            return False
331
332        # We only do the process liveness check if we're not using
333        # process groups.  With process groups, checking if the main
334        # inferior process is dead and short circuiting here is no
335        # good - children of it in the process group could still be
336        # alive, and they should be killed during a timeout.
337        if not popen_process.using_process_groups:
338            # Don't kill if it's already dead.
339            popen_process.poll()
340            if popen_process.returncode is not None:
341                # It has a returncode.  It has already stopped.
342                if log_file:
343                    log_file.write(
344                        "requested to terminate pid {} but it has already "
345                        "terminated, returncode {}".format(
346                            popen_process.pid, popen_process.returncode))
347                # Move along...
348                return False
349
350        # Good to go.
351        return True
352
353    def _kill_with_signal(self, popen_process, log_file, signum):
354        # Validate we're ready to terminate this.
355        if not self._validate_pre_terminate(popen_process, log_file):
356            return
357
358        # Choose kill mechanism based on whether we're targeting
359        # a process group or just a process.
360        if popen_process.using_process_groups:
361            # if log_file:
362            #    log_file.write(
363            #        "sending signum {} to process group {} now\n".format(
364            #            signum, popen_process.pid))
365            os.killpg(popen_process.pid, signum)
366        else:
367            # if log_file:
368            #    log_file.write(
369            #        "sending signum {} to process {} now\n".format(
370            #            signum, popen_process.pid))
371            os.kill(popen_process.pid, signum)
372
373    def soft_terminate(self, popen_process, log_file=None, want_core=True):
374        # Choose signal based on desire for core file.
375        if want_core:
376            # SIGQUIT will generate core by default.  Can be caught.
377            signum = signal.SIGQUIT
378        else:
379            # SIGTERM is the traditional nice way to kill a process.
380            # Can be caught, doesn't generate a core.
381            signum = signal.SIGTERM
382
383        self._kill_with_signal(popen_process, log_file, signum)
384
385    def hard_terminate(self, popen_process, log_file=None):
386        self._kill_with_signal(popen_process, log_file, signal.SIGKILL)
387
388    def was_soft_terminate(self, returncode, with_core):
389        if with_core:
390            return returncode == -signal.SIGQUIT
391        else:
392            return returncode == -signal.SIGTERM
393
394    def was_hard_terminate(self, returncode):
395        return returncode == -signal.SIGKILL
396
397    def soft_terminate_signals(self):
398        return [signal.SIGQUIT, signal.SIGTERM]
399
400    def is_exceptional_exit(self, popen_status):
401        return popen_status < 0
402
403    @classmethod
404    def _signal_names_by_number(cls):
405        return dict(
406            (k, v) for v, k in reversed(sorted(signal.__dict__.items()))
407            if v.startswith('SIG') and not v.startswith('SIG_'))
408
409    def exceptional_exit_details(self, popen_status):
410        signo = -popen_status
411        signal_names_by_number = self._signal_names_by_number()
412        signal_name = signal_names_by_number.get(signo, "")
413        return (signo, signal_name)
414
415class WindowsProcessHelper(ProcessHelper):
416    """Provides a Windows implementation of the ProcessHelper class."""
417    def __init__(self):
418        super(WindowsProcessHelper, self).__init__()
419
420    def create_piped_process(self, command, new_process_group=True):
421        if new_process_group:
422            # We need this flag if we want os.kill() to work on the subprocess.
423            creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP
424        else:
425            creation_flags = 0
426
427        return subprocess.Popen(
428            command,
429            stdin=subprocess.PIPE,
430            stdout=subprocess.PIPE,
431            stderr=subprocess.PIPE,
432            universal_newlines=True, # Elicits automatic byte -> string decoding in Py3
433            creationflags=creation_flags)
434
435    def was_hard_terminate(self, returncode):
436        return returncode != 0
437
438
439class ProcessDriver(object):
440    """Drives a child process, notifies on important events, and can timeout.
441
442    Clients are expected to derive from this class and override the
443    on_process_started and on_process_exited methods if they want to
444    hook either of those.
445
446    This class supports timing out the child process in a platform-agnostic
447    way.  The on_process_exited method is informed if the exit was natural
448    or if it was due to a timeout.
449    """
450    def __init__(self, soft_terminate_timeout=10.0):
451        super(ProcessDriver, self).__init__()
452        self.process_helper = ProcessHelper.process_helper()
453        self.pid = None
454        # Create the synchronization event for notifying when the
455        # inferior dotest process is complete.
456        self.done_event = threading.Event()
457        self.io_thread = None
458        self.process = None
459        # Number of seconds to wait for the soft terminate to
460        # wrap up, before moving to more drastic measures.
461        # Might want this longer if core dumps are generated and
462        # take a long time to write out.
463        self.soft_terminate_timeout = soft_terminate_timeout
464        # Number of seconds to wait for the hard terminate to
465        # wrap up, before giving up on the io thread.  This should
466        # be fast.
467        self.hard_terminate_timeout = 5.0
468        self.returncode = None
469
470    # =============================================
471    # Methods for subclasses to override if desired.
472    # =============================================
473
474    def on_process_started(self):
475        pass
476
477    def on_process_exited(self, command, output, was_timeout, exit_status):
478        pass
479
480    def write(self, content):
481        # pylint: disable=no-self-use
482        # Intended - we want derived classes to be able to override
483        # this and use any self state they may contain.
484        sys.stdout.write(content)
485
486    # ==============================================================
487    # Operations used to drive processes.  Clients will want to call
488    # one of these.
489    # ==============================================================
490
491    def run_command(self, command):
492        # Start up the child process and the thread that does the
493        # communication pump.
494        self._start_process_and_io_thread(command)
495
496        # Wait indefinitely for the child process to finish
497        # communicating.  This indicates it has closed stdout/stderr
498        # pipes and is done.
499        self.io_thread.join()
500        self.returncode = self.process.wait()
501        if self.returncode is None:
502            raise Exception(
503                "no exit status available for pid {} after the "
504                " inferior dotest.py should have completed".format(
505                    self.process.pid))
506
507        # Notify of non-timeout exit.
508        self.on_process_exited(
509            command,
510            self.io_thread.output,
511            False,
512            self.returncode)
513
514    def run_command_with_timeout(self, command, timeout, want_core):
515        # Figure out how many seconds our timeout description is requesting.
516        timeout_seconds = timeout_to_seconds(timeout)
517
518        # Start up the child process and the thread that does the
519        # communication pump.
520        self._start_process_and_io_thread(command)
521
522        self._wait_with_timeout(timeout_seconds, command, want_core)
523
524    # ================
525    # Internal details.
526    # ================
527
528    def _start_process_and_io_thread(self, command):
529        # Create the process.
530        self.process = self.process_helper.create_piped_process(command)
531        self.pid = self.process.pid
532        self.on_process_started()
533
534        # Ensure the event is cleared that is used for signaling
535        # from the communication() thread when communication is
536        # complete (i.e. the inferior process has finished).
537        self.done_event.clear()
538
539        self.io_thread = CommunicatorThread(
540            self.process, self.done_event, self.write)
541        self.io_thread.start()
542
543    def _attempt_soft_kill(self, want_core):
544        # The inferior dotest timed out.  Attempt to clean it
545        # with a non-drastic method (so it can clean up properly
546        # and/or generate a core dump).  Often the OS can't guarantee
547        # that the process will really terminate after this.
548        self.process_helper.soft_terminate(
549            self.process,
550            want_core=want_core,
551            log_file=self)
552
553        # Now wait up to a certain timeout period for the io thread
554        # to say that the communication ended.  If that wraps up
555        # within our soft terminate timeout, we're all done here.
556        self.io_thread.join(self.soft_terminate_timeout)
557        if not self.io_thread.is_alive():
558            # stdout/stderr were closed on the child process side. We
559            # should be able to wait and reap the child process here.
560            self.returncode = self.process.wait()
561            # We terminated, and the done_trying result is n/a
562            terminated = True
563            done_trying = None
564        else:
565            self.write("soft kill attempt of process {} timed out "
566                       "after {} seconds\n".format(
567                           self.process.pid, self.soft_terminate_timeout))
568            terminated = False
569            done_trying = False
570        return terminated, done_trying
571
572    def _attempt_hard_kill(self):
573        # Instruct the process to terminate and really force it to
574        # happen.  Don't give the process a chance to ignore.
575        self.process_helper.hard_terminate(
576            self.process,
577            log_file=self)
578
579        # Reap the child process.  This should not hang as the
580        # hard_kill() mechanism is supposed to really kill it.
581        # Improvement option:
582        # If this does ever hang, convert to a self.process.poll()
583        # loop checking on self.process.returncode until it is not
584        # None or the timeout occurs.
585        self.returncode = self.process.wait()
586
587        # Wait a few moments for the io thread to finish...
588        self.io_thread.join(self.hard_terminate_timeout)
589        if self.io_thread.is_alive():
590            # ... but this is not critical if it doesn't end for some
591            # reason.
592            self.write(
593                "hard kill of process {} timed out after {} seconds waiting "
594                "for the io thread (ignoring)\n".format(
595                    self.process.pid, self.hard_terminate_timeout))
596
597        # Set if it terminated.  (Set up for optional improvement above).
598        terminated = self.returncode is not None
599        # Nothing else to try.
600        done_trying = True
601
602        return terminated, done_trying
603
604    def _attempt_termination(self, attempt_count, want_core):
605        if self.process_helper.supports_soft_terminate():
606            # When soft termination is supported, we first try to stop
607            # the process with a soft terminate.  Failing that, we try
608            # the hard terminate option.
609            if attempt_count == 1:
610                return self._attempt_soft_kill(want_core)
611            elif attempt_count == 2:
612                return self._attempt_hard_kill()
613            else:
614                # We don't have anything else to try.
615                terminated = self.returncode is not None
616                done_trying = True
617                return terminated, done_trying
618        else:
619            # We only try the hard terminate option when there
620            # is no soft terminate available.
621            if attempt_count == 1:
622                return self._attempt_hard_kill()
623            else:
624                # We don't have anything else to try.
625                terminated = self.returncode is not None
626                done_trying = True
627                return terminated, done_trying
628
629    def _wait_with_timeout(self, timeout_seconds, command, want_core):
630        # Allow up to timeout seconds for the io thread to wrap up.
631        # If that completes, the child process should be done.
632        completed_normally = self.done_event.wait(timeout_seconds)
633        if completed_normally:
634            # Reap the child process here.
635            self.returncode = self.process.wait()
636        else:
637            # Prepare to stop the process
638            process_terminated = completed_normally
639            terminate_attempt_count = 0
640
641            # Try as many attempts as we support for trying to shut down
642            # the child process if it's not already shut down.
643            while not process_terminated:
644                terminate_attempt_count += 1
645                # Attempt to terminate.
646                process_terminated, done_trying = self._attempt_termination(
647                    terminate_attempt_count, want_core)
648                # Check if there's nothing more to try.
649                if done_trying:
650                    # Break out of our termination attempt loop.
651                    break
652
653        # At this point, we're calling it good.  The process
654        # finished gracefully, was shut down after one or more
655        # attempts, or we failed but gave it our best effort.
656        self.on_process_exited(
657            command,
658            self.io_thread.output,
659            not completed_normally,
660            self.returncode)
661
662
663def patched_init(self, *args, **kwargs):
664    self.original_init(*args, **kwargs)
665    # Initialize our condition variable that protects wait()/poll().
666    self.wait_condition = threading.Condition()
667
668
669def patched_wait(self, *args, **kwargs):
670    self.wait_condition.acquire()
671    try:
672        result = self.original_wait(*args, **kwargs)
673        # The process finished.  Signal the condition.
674        self.wait_condition.notify_all()
675        return result
676    finally:
677        self.wait_condition.release()
678
679
680def patched_poll(self, *args, **kwargs):
681    self.wait_condition.acquire()
682    try:
683        result = self.original_poll(*args, **kwargs)
684        if self.returncode is not None:
685            # We did complete, and we have the return value.
686            # Signal the event to indicate we're done.
687            self.wait_condition.notify_all()
688        return result
689    finally:
690        self.wait_condition.release()
691
692
693def patch_up_subprocess_popen():
694    subprocess.Popen.original_init = subprocess.Popen.__init__
695    subprocess.Popen.__init__ = patched_init
696
697    subprocess.Popen.original_wait = subprocess.Popen.wait
698    subprocess.Popen.wait = patched_wait
699
700    subprocess.Popen.original_poll = subprocess.Popen.poll
701    subprocess.Popen.poll = patched_poll
702
703# Replace key subprocess.Popen() threading-unprotected methods with
704# threading-protected versions.
705patch_up_subprocess_popen()
706