xref: /llvm-project/lldb/packages/Python/lldbsuite/test/test_runner/process_control.py (revision b9c1b51e45b845debb76d8658edabca70ca56079)
1"""
2The LLVM Compiler Infrastructure
3
4This file is distributed under the University of Illinois Open Source
5License. See LICENSE.TXT for details.
6
7Provides classes used by the test results reporting infrastructure
8within the LLDB test suite.
9
10
11This module provides process-management support for the LLDB test
12running infrasructure.
13"""
14
15# System imports
16import os
17import re
18import signal
19import subprocess
20import sys
21import threading
22
23
24class CommunicatorThread(threading.Thread):
25    """Provides a thread class that communicates with a subprocess."""
26
27    def __init__(self, process, event, output_file):
28        super(CommunicatorThread, self).__init__()
29        # Don't let this thread prevent shutdown.
30        self.daemon = True
31        self.process = process
32        self.pid = process.pid
33        self.event = event
34        self.output_file = output_file
35        self.output = None
36
37    def run(self):
38        try:
39            # Communicate with the child process.
40            # This will not complete until the child process terminates.
41            self.output = self.process.communicate()
42        except Exception as exception:  # pylint: disable=broad-except
43            if self.output_file:
44                self.output_file.write(
45                    "exception while using communicate() for pid: {}\n".format(
46                        exception))
47        finally:
48            # Signal that the thread's run is complete.
49            self.event.set()
50
51
52# Provides a regular expression for matching gtimeout-based durations.
53TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$")
54
55
56def timeout_to_seconds(timeout):
57    """Converts timeout/gtimeout timeout values into seconds.
58
59    @param timeout a timeout in the form of xm representing x minutes.
60
61    @return None if timeout is None, or the number of seconds as a float
62    if a valid timeout format was specified.
63    """
64    if timeout is None:
65        return None
66    else:
67        match = TIMEOUT_REGEX.match(timeout)
68        if match:
69            value = float(match.group(1))
70            units = match.group(2)
71            if units is None:
72                # default is seconds.  No conversion necessary.
73                return value
74            elif units == 's':
75                # Seconds.  No conversion necessary.
76                return value
77            elif units == 'm':
78                # Value is in minutes.
79                return 60.0 * value
80            elif units == 'h':
81                # Value is in hours.
82                return (60.0 * 60.0) * value
83            elif units == 'd':
84                # Value is in days.
85                return 24 * (60.0 * 60.0) * value
86            else:
87                raise Exception("unexpected units value '{}'".format(units))
88        else:
89            raise Exception("could not parse TIMEOUT spec '{}'".format(
90                timeout))
91
92
93class ProcessHelper(object):
94    """Provides an interface for accessing process-related functionality.
95
96    This class provides a factory method that gives the caller a
97    platform-specific implementation instance of the class.
98
99    Clients of the class should stick to the methods provided in this
100    base class.
101
102    @see ProcessHelper.process_helper()
103    """
104
105    def __init__(self):
106        super(ProcessHelper, self).__init__()
107
108    @classmethod
109    def process_helper(cls):
110        """Returns a platform-specific ProcessHelper instance.
111        @return a ProcessHelper instance that does the right thing for
112        the current platform.
113        """
114
115        # If you add a new platform, create an instance here and
116        # return it.
117        if os.name == "nt":
118            return WindowsProcessHelper()
119        else:
120            # For all POSIX-like systems.
121            return UnixProcessHelper()
122
123    def create_piped_process(self, command, new_process_group=True):
124        # pylint: disable=no-self-use,unused-argument
125        # As expected.  We want derived classes to implement this.
126        """Creates a subprocess.Popen-based class with I/O piped to the parent.
127
128        @param command the command line list as would be passed to
129        subprocess.Popen().  Use the list form rather than the string form.
130
131        @param new_process_group indicates if the caller wants the
132        process to be created in its own process group.  Each OS handles
133        this concept differently.  It provides a level of isolation and
134        can simplify or enable terminating the process tree properly.
135
136        @return a subprocess.Popen-like object.
137        """
138        raise Exception("derived class must implement")
139
140    def supports_soft_terminate(self):
141        # pylint: disable=no-self-use
142        # As expected.  We want derived classes to implement this.
143        """Indicates if the platform supports soft termination.
144
145        Soft termination is the concept of a terminate mechanism that
146        allows the target process to shut down nicely, but with the
147        catch that the process might choose to ignore it.
148
149        Platform supporter note: only mark soft terminate as supported
150        if the target process has some way to evade the soft terminate
151        request; otherwise, just support the hard terminate method.
152
153        @return True if the platform supports a soft terminate mechanism.
154        """
155        # By default, we do not support a soft terminate mechanism.
156        return False
157
158    def soft_terminate(self, popen_process, log_file=None, want_core=True):
159        # pylint: disable=no-self-use,unused-argument
160        # As expected.  We want derived classes to implement this.
161        """Attempts to terminate the process in a polite way.
162
163        This terminate method is intended to give the child process a
164        chance to clean up and exit on its own, possibly with a request
165        to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog,
166        etc.)  If new_process_group was set in the process creation method
167        and the platform supports it, this terminate call will attempt to
168        kill the whole process tree rooted in this child process.
169
170        @param popen_process the subprocess.Popen-like object returned
171        by one of the process-creation methods of this class.
172
173        @param log_file file-like object used to emit error-related
174        logging info.  May be None if no error-related info is desired.
175
176        @param want_core True if the caller would like to get a core
177        dump (or the analogous crash report) from the terminated process.
178        """
179        popen_process.terminate()
180
181    def hard_terminate(self, popen_process, log_file=None):
182        # pylint: disable=no-self-use,unused-argument
183        # As expected.  We want derived classes to implement this.
184        """Attempts to terminate the process immediately.
185
186        This terminate method is intended to kill child process in
187        a manner in which the child process has no ability to block,
188        and also has no ability to clean up properly.  If new_process_group
189        was specified when creating the process, and if the platform
190        implementation supports it, this will attempt to kill the
191        whole process tree rooted in the child process.
192
193        @param popen_process the subprocess.Popen-like object returned
194        by one of the process-creation methods of this class.
195
196        @param log_file file-like object used to emit error-related
197        logging info.  May be None if no error-related info is desired.
198        """
199        popen_process.kill()
200
201    def was_soft_terminate(self, returncode, with_core):
202        # pylint: disable=no-self-use,unused-argument
203        # As expected.  We want derived classes to implement this.
204        """Returns if Popen-like object returncode matches soft terminate.
205
206        @param returncode the returncode from the Popen-like object that
207        terminated with a given return code.
208
209        @param with_core indicates whether the returncode should match
210        a core-generating return signal.
211
212        @return True when the returncode represents what the system would
213        issue when a soft_terminate() with the given with_core arg occurred;
214        False otherwise.
215        """
216        if not self.supports_soft_terminate():
217            # If we don't support soft termination on this platform,
218            # then this should always be False.
219            return False
220        else:
221            # Once a platform claims to support soft terminate, it
222            # needs to be able to identify it by overriding this method.
223            raise Exception("platform needs to implement")
224
225    def was_hard_terminate(self, returncode):
226        # pylint: disable=no-self-use,unused-argument
227        # As expected.  We want derived classes to implement this.
228        """Returns if Popen-like object returncode matches that of a hard
229        terminate attempt.
230
231        @param returncode the returncode from the Popen-like object that
232        terminated with a given return code.
233
234        @return True when the returncode represents what the system would
235        issue when a hard_terminate() occurred; False
236        otherwise.
237        """
238        raise Exception("platform needs to implement")
239
240    def soft_terminate_signals(self):
241        # pylint: disable=no-self-use
242        """Retrieve signal numbers that can be sent to soft terminate.
243        @return a list of signal numbers that can be sent to soft terminate
244        a process, or None if not applicable.
245        """
246        return None
247
248    def is_exceptional_exit(self, popen_status):
249        """Returns whether the program exit status is exceptional.
250
251        Returns whether the return code from a Popen process is exceptional
252        (e.g. signals on POSIX systems).
253
254        Derived classes should override this if they can detect exceptional
255        program exit.
256
257        @return True if the given popen_status represents an exceptional
258        program exit; False otherwise.
259        """
260        return False
261
262    def exceptional_exit_details(self, popen_status):
263        """Returns the normalized exceptional exit code and a description.
264
265        Given an exceptional exit code, returns the integral value of the
266        exception (e.g. signal number for POSIX) and a description (e.g.
267        signal name on POSIX) for the result.
268
269        Derived classes should override this if they can detect exceptional
270        program exit.
271
272        It is fine to not implement this so long as is_exceptional_exit()
273        always returns False.
274
275        @return (normalized exception code, symbolic exception description)
276        """
277        raise Exception("exception_exit_details() called on unsupported class")
278
279
280class UnixProcessHelper(ProcessHelper):
281    """Provides a ProcessHelper for Unix-like operating systems.
282
283    This implementation supports anything that looks Posix-y
284    (e.g. Darwin, Linux, *BSD, etc.)
285    """
286
287    def __init__(self):
288        super(UnixProcessHelper, self).__init__()
289
290    @classmethod
291    def _create_new_process_group(cls):
292        """Creates a new process group for the calling process."""
293        os.setpgid(os.getpid(), os.getpid())
294
295    def create_piped_process(self, command, new_process_group=True):
296        # Determine what to run after the fork but before the exec.
297        if new_process_group:
298            preexec_func = self._create_new_process_group
299        else:
300            preexec_func = None
301
302        # Create the process.
303        process = subprocess.Popen(
304            command,
305            stdin=subprocess.PIPE,
306            stdout=subprocess.PIPE,
307            stderr=subprocess.PIPE,
308            universal_newlines=True,  # Elicits automatic byte -> string decoding in Py3
309            close_fds=True,
310            preexec_fn=preexec_func)
311
312        # Remember whether we're using process groups for this
313        # process.
314        process.using_process_groups = new_process_group
315        return process
316
317    def supports_soft_terminate(self):
318        # POSIX does support a soft terminate via:
319        # * SIGTERM (no core requested)
320        # * SIGQUIT (core requested if enabled, see ulimit -c)
321        return True
322
323    @classmethod
324    def _validate_pre_terminate(cls, popen_process, log_file):
325        # Validate args.
326        if popen_process is None:
327            raise ValueError("popen_process is None")
328
329        # Ensure we have something that looks like a valid process.
330        if popen_process.pid < 1:
331            if log_file:
332                log_file.write("skipping soft_terminate(): no process id")
333            return False
334
335        # We only do the process liveness check if we're not using
336        # process groups.  With process groups, checking if the main
337        # inferior process is dead and short circuiting here is no
338        # good - children of it in the process group could still be
339        # alive, and they should be killed during a timeout.
340        if not popen_process.using_process_groups:
341            # Don't kill if it's already dead.
342            popen_process.poll()
343            if popen_process.returncode is not None:
344                # It has a returncode.  It has already stopped.
345                if log_file:
346                    log_file.write(
347                        "requested to terminate pid {} but it has already "
348                        "terminated, returncode {}".format(
349                            popen_process.pid, popen_process.returncode))
350                # Move along...
351                return False
352
353        # Good to go.
354        return True
355
356    def _kill_with_signal(self, popen_process, log_file, signum):
357        # Validate we're ready to terminate this.
358        if not self._validate_pre_terminate(popen_process, log_file):
359            return
360
361        # Choose kill mechanism based on whether we're targeting
362        # a process group or just a process.
363        if popen_process.using_process_groups:
364            # if log_file:
365            #    log_file.write(
366            #        "sending signum {} to process group {} now\n".format(
367            #            signum, popen_process.pid))
368            os.killpg(popen_process.pid, signum)
369        else:
370            # if log_file:
371            #    log_file.write(
372            #        "sending signum {} to process {} now\n".format(
373            #            signum, popen_process.pid))
374            os.kill(popen_process.pid, signum)
375
376    def soft_terminate(self, popen_process, log_file=None, want_core=True):
377        # Choose signal based on desire for core file.
378        if want_core:
379            # SIGQUIT will generate core by default.  Can be caught.
380            signum = signal.SIGQUIT
381        else:
382            # SIGTERM is the traditional nice way to kill a process.
383            # Can be caught, doesn't generate a core.
384            signum = signal.SIGTERM
385
386        self._kill_with_signal(popen_process, log_file, signum)
387
388    def hard_terminate(self, popen_process, log_file=None):
389        self._kill_with_signal(popen_process, log_file, signal.SIGKILL)
390
391    def was_soft_terminate(self, returncode, with_core):
392        if with_core:
393            return returncode == -signal.SIGQUIT
394        else:
395            return returncode == -signal.SIGTERM
396
397    def was_hard_terminate(self, returncode):
398        return returncode == -signal.SIGKILL
399
400    def soft_terminate_signals(self):
401        return [signal.SIGQUIT, signal.SIGTERM]
402
403    def is_exceptional_exit(self, popen_status):
404        return popen_status < 0
405
406    @classmethod
407    def _signal_names_by_number(cls):
408        return dict(
409            (k, v) for v, k in reversed(sorted(signal.__dict__.items()))
410            if v.startswith('SIG') and not v.startswith('SIG_'))
411
412    def exceptional_exit_details(self, popen_status):
413        signo = -popen_status
414        signal_names_by_number = self._signal_names_by_number()
415        signal_name = signal_names_by_number.get(signo, "")
416        return (signo, signal_name)
417
418
419class WindowsProcessHelper(ProcessHelper):
420    """Provides a Windows implementation of the ProcessHelper class."""
421
422    def __init__(self):
423        super(WindowsProcessHelper, self).__init__()
424
425    def create_piped_process(self, command, new_process_group=True):
426        if new_process_group:
427            # We need this flag if we want os.kill() to work on the subprocess.
428            creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP
429        else:
430            creation_flags = 0
431
432        return subprocess.Popen(
433            command,
434            stdin=subprocess.PIPE,
435            stdout=subprocess.PIPE,
436            stderr=subprocess.PIPE,
437            universal_newlines=True,  # Elicits automatic byte -> string decoding in Py3
438            creationflags=creation_flags)
439
440    def was_hard_terminate(self, returncode):
441        return returncode != 0
442
443
444class ProcessDriver(object):
445    """Drives a child process, notifies on important events, and can timeout.
446
447    Clients are expected to derive from this class and override the
448    on_process_started and on_process_exited methods if they want to
449    hook either of those.
450
451    This class supports timing out the child process in a platform-agnostic
452    way.  The on_process_exited method is informed if the exit was natural
453    or if it was due to a timeout.
454    """
455
456    def __init__(self, soft_terminate_timeout=10.0):
457        super(ProcessDriver, self).__init__()
458        self.process_helper = ProcessHelper.process_helper()
459        self.pid = None
460        # Create the synchronization event for notifying when the
461        # inferior dotest process is complete.
462        self.done_event = threading.Event()
463        self.io_thread = None
464        self.process = None
465        # Number of seconds to wait for the soft terminate to
466        # wrap up, before moving to more drastic measures.
467        # Might want this longer if core dumps are generated and
468        # take a long time to write out.
469        self.soft_terminate_timeout = soft_terminate_timeout
470        # Number of seconds to wait for the hard terminate to
471        # wrap up, before giving up on the io thread.  This should
472        # be fast.
473        self.hard_terminate_timeout = 5.0
474        self.returncode = None
475
476    # =============================================
477    # Methods for subclasses to override if desired.
478    # =============================================
479
480    def on_process_started(self):
481        pass
482
483    def on_process_exited(self, command, output, was_timeout, exit_status):
484        pass
485
486    def write(self, content):
487        # pylint: disable=no-self-use
488        # Intended - we want derived classes to be able to override
489        # this and use any self state they may contain.
490        sys.stdout.write(content)
491
492    # ==============================================================
493    # Operations used to drive processes.  Clients will want to call
494    # one of these.
495    # ==============================================================
496
497    def run_command(self, command):
498        # Start up the child process and the thread that does the
499        # communication pump.
500        self._start_process_and_io_thread(command)
501
502        # Wait indefinitely for the child process to finish
503        # communicating.  This indicates it has closed stdout/stderr
504        # pipes and is done.
505        self.io_thread.join()
506        self.returncode = self.process.wait()
507        if self.returncode is None:
508            raise Exception(
509                "no exit status available for pid {} after the "
510                " inferior dotest.py should have completed".format(
511                    self.process.pid))
512
513        # Notify of non-timeout exit.
514        self.on_process_exited(
515            command,
516            self.io_thread.output,
517            False,
518            self.returncode)
519
520    def run_command_with_timeout(self, command, timeout, want_core):
521        # Figure out how many seconds our timeout description is requesting.
522        timeout_seconds = timeout_to_seconds(timeout)
523
524        # Start up the child process and the thread that does the
525        # communication pump.
526        self._start_process_and_io_thread(command)
527
528        self._wait_with_timeout(timeout_seconds, command, want_core)
529
530    # ================
531    # Internal details.
532    # ================
533
534    def _start_process_and_io_thread(self, command):
535        # Create the process.
536        self.process = self.process_helper.create_piped_process(command)
537        self.pid = self.process.pid
538        self.on_process_started()
539
540        # Ensure the event is cleared that is used for signaling
541        # from the communication() thread when communication is
542        # complete (i.e. the inferior process has finished).
543        self.done_event.clear()
544
545        self.io_thread = CommunicatorThread(
546            self.process, self.done_event, self.write)
547        self.io_thread.start()
548
549    def _attempt_soft_kill(self, want_core):
550        # The inferior dotest timed out.  Attempt to clean it
551        # with a non-drastic method (so it can clean up properly
552        # and/or generate a core dump).  Often the OS can't guarantee
553        # that the process will really terminate after this.
554        self.process_helper.soft_terminate(
555            self.process,
556            want_core=want_core,
557            log_file=self)
558
559        # Now wait up to a certain timeout period for the io thread
560        # to say that the communication ended.  If that wraps up
561        # within our soft terminate timeout, we're all done here.
562        self.io_thread.join(self.soft_terminate_timeout)
563        if not self.io_thread.is_alive():
564            # stdout/stderr were closed on the child process side. We
565            # should be able to wait and reap the child process here.
566            self.returncode = self.process.wait()
567            # We terminated, and the done_trying result is n/a
568            terminated = True
569            done_trying = None
570        else:
571            self.write("soft kill attempt of process {} timed out "
572                       "after {} seconds\n".format(
573                           self.process.pid, self.soft_terminate_timeout))
574            terminated = False
575            done_trying = False
576        return terminated, done_trying
577
578    def _attempt_hard_kill(self):
579        # Instruct the process to terminate and really force it to
580        # happen.  Don't give the process a chance to ignore.
581        self.process_helper.hard_terminate(
582            self.process,
583            log_file=self)
584
585        # Reap the child process.  This should not hang as the
586        # hard_kill() mechanism is supposed to really kill it.
587        # Improvement option:
588        # If this does ever hang, convert to a self.process.poll()
589        # loop checking on self.process.returncode until it is not
590        # None or the timeout occurs.
591        self.returncode = self.process.wait()
592
593        # Wait a few moments for the io thread to finish...
594        self.io_thread.join(self.hard_terminate_timeout)
595        if self.io_thread.is_alive():
596            # ... but this is not critical if it doesn't end for some
597            # reason.
598            self.write(
599                "hard kill of process {} timed out after {} seconds waiting "
600                "for the io thread (ignoring)\n".format(
601                    self.process.pid, self.hard_terminate_timeout))
602
603        # Set if it terminated.  (Set up for optional improvement above).
604        terminated = self.returncode is not None
605        # Nothing else to try.
606        done_trying = True
607
608        return terminated, done_trying
609
610    def _attempt_termination(self, attempt_count, want_core):
611        if self.process_helper.supports_soft_terminate():
612            # When soft termination is supported, we first try to stop
613            # the process with a soft terminate.  Failing that, we try
614            # the hard terminate option.
615            if attempt_count == 1:
616                return self._attempt_soft_kill(want_core)
617            elif attempt_count == 2:
618                return self._attempt_hard_kill()
619            else:
620                # We don't have anything else to try.
621                terminated = self.returncode is not None
622                done_trying = True
623                return terminated, done_trying
624        else:
625            # We only try the hard terminate option when there
626            # is no soft terminate available.
627            if attempt_count == 1:
628                return self._attempt_hard_kill()
629            else:
630                # We don't have anything else to try.
631                terminated = self.returncode is not None
632                done_trying = True
633                return terminated, done_trying
634
635    def _wait_with_timeout(self, timeout_seconds, command, want_core):
636        # Allow up to timeout seconds for the io thread to wrap up.
637        # If that completes, the child process should be done.
638        completed_normally = self.done_event.wait(timeout_seconds)
639        if completed_normally:
640            # Reap the child process here.
641            self.returncode = self.process.wait()
642        else:
643            # Prepare to stop the process
644            process_terminated = completed_normally
645            terminate_attempt_count = 0
646
647            # Try as many attempts as we support for trying to shut down
648            # the child process if it's not already shut down.
649            while not process_terminated:
650                terminate_attempt_count += 1
651                # Attempt to terminate.
652                process_terminated, done_trying = self._attempt_termination(
653                    terminate_attempt_count, want_core)
654                # Check if there's nothing more to try.
655                if done_trying:
656                    # Break out of our termination attempt loop.
657                    break
658
659        # At this point, we're calling it good.  The process
660        # finished gracefully, was shut down after one or more
661        # attempts, or we failed but gave it our best effort.
662        self.on_process_exited(
663            command,
664            self.io_thread.output,
665            not completed_normally,
666            self.returncode)
667
668
669def patched_init(self, *args, **kwargs):
670    self.original_init(*args, **kwargs)
671    # Initialize our condition variable that protects wait()/poll().
672    self.wait_condition = threading.Condition()
673
674
675def patched_wait(self, *args, **kwargs):
676    self.wait_condition.acquire()
677    try:
678        result = self.original_wait(*args, **kwargs)
679        # The process finished.  Signal the condition.
680        self.wait_condition.notify_all()
681        return result
682    finally:
683        self.wait_condition.release()
684
685
686def patched_poll(self, *args, **kwargs):
687    self.wait_condition.acquire()
688    try:
689        result = self.original_poll(*args, **kwargs)
690        if self.returncode is not None:
691            # We did complete, and we have the return value.
692            # Signal the event to indicate we're done.
693            self.wait_condition.notify_all()
694        return result
695    finally:
696        self.wait_condition.release()
697
698
699def patch_up_subprocess_popen():
700    subprocess.Popen.original_init = subprocess.Popen.__init__
701    subprocess.Popen.__init__ = patched_init
702
703    subprocess.Popen.original_wait = subprocess.Popen.wait
704    subprocess.Popen.wait = patched_wait
705
706    subprocess.Popen.original_poll = subprocess.Popen.poll
707    subprocess.Popen.poll = patched_poll
708
709# Replace key subprocess.Popen() threading-unprotected methods with
710# threading-protected versions.
711patch_up_subprocess_popen()
712