1""" 2Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3See https://llvm.org/LICENSE.txt for license information. 4SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5 6Provides classes used by the test results reporting infrastructure 7within the LLDB test suite. 8 9 10This module provides process-management support for the LLDB test 11running infrastructure. 12""" 13 14# System imports 15import os 16import re 17import signal 18import subprocess 19import sys 20import threading 21 22 23class CommunicatorThread(threading.Thread): 24 """Provides a thread class that communicates with a subprocess.""" 25 26 def __init__(self, process, event, output_file): 27 super(CommunicatorThread, self).__init__() 28 # Don't let this thread prevent shutdown. 29 self.daemon = True 30 self.process = process 31 self.pid = process.pid 32 self.event = event 33 self.output_file = output_file 34 self.output = None 35 36 def run(self): 37 try: 38 # Communicate with the child process. 39 # This will not complete until the child process terminates. 40 self.output = self.process.communicate() 41 except Exception as exception: # pylint: disable=broad-except 42 if self.output_file: 43 self.output_file.write( 44 "exception while using communicate() for pid: {}\n".format( 45 exception)) 46 finally: 47 # Signal that the thread's run is complete. 48 self.event.set() 49 50 51# Provides a regular expression for matching gtimeout-based durations. 52TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$") 53 54 55def timeout_to_seconds(timeout): 56 """Converts timeout/gtimeout timeout values into seconds. 57 58 @param timeout a timeout in the form of xm representing x minutes. 59 60 @return None if timeout is None, or the number of seconds as a float 61 if a valid timeout format was specified. 62 """ 63 if timeout is None: 64 return None 65 else: 66 match = TIMEOUT_REGEX.match(timeout) 67 if match: 68 value = float(match.group(1)) 69 units = match.group(2) 70 if units is None: 71 # default is seconds. No conversion necessary. 72 return value 73 elif units == 's': 74 # Seconds. No conversion necessary. 75 return value 76 elif units == 'm': 77 # Value is in minutes. 78 return 60.0 * value 79 elif units == 'h': 80 # Value is in hours. 81 return (60.0 * 60.0) * value 82 elif units == 'd': 83 # Value is in days. 84 return 24 * (60.0 * 60.0) * value 85 else: 86 raise Exception("unexpected units value '{}'".format(units)) 87 else: 88 raise Exception("could not parse TIMEOUT spec '{}'".format( 89 timeout)) 90 91 92class ProcessHelper(object): 93 """Provides an interface for accessing process-related functionality. 94 95 This class provides a factory method that gives the caller a 96 platform-specific implementation instance of the class. 97 98 Clients of the class should stick to the methods provided in this 99 base class. 100 101 @see ProcessHelper.process_helper() 102 """ 103 104 def __init__(self): 105 super(ProcessHelper, self).__init__() 106 107 @classmethod 108 def process_helper(cls): 109 """Returns a platform-specific ProcessHelper instance. 110 @return a ProcessHelper instance that does the right thing for 111 the current platform. 112 """ 113 114 # If you add a new platform, create an instance here and 115 # return it. 116 if os.name == "nt": 117 return WindowsProcessHelper() 118 else: 119 # For all POSIX-like systems. 120 return UnixProcessHelper() 121 122 def create_piped_process(self, command, new_process_group=True): 123 # pylint: disable=no-self-use,unused-argument 124 # As expected. We want derived classes to implement this. 125 """Creates a subprocess.Popen-based class with I/O piped to the parent. 126 127 @param command the command line list as would be passed to 128 subprocess.Popen(). Use the list form rather than the string form. 129 130 @param new_process_group indicates if the caller wants the 131 process to be created in its own process group. Each OS handles 132 this concept differently. It provides a level of isolation and 133 can simplify or enable terminating the process tree properly. 134 135 @return a subprocess.Popen-like object. 136 """ 137 raise Exception("derived class must implement") 138 139 def supports_soft_terminate(self): 140 # pylint: disable=no-self-use 141 # As expected. We want derived classes to implement this. 142 """Indicates if the platform supports soft termination. 143 144 Soft termination is the concept of a terminate mechanism that 145 allows the target process to shut down nicely, but with the 146 catch that the process might choose to ignore it. 147 148 Platform supporter note: only mark soft terminate as supported 149 if the target process has some way to evade the soft terminate 150 request; otherwise, just support the hard terminate method. 151 152 @return True if the platform supports a soft terminate mechanism. 153 """ 154 # By default, we do not support a soft terminate mechanism. 155 return False 156 157 def soft_terminate(self, popen_process, log_file=None, want_core=True): 158 # pylint: disable=no-self-use,unused-argument 159 # As expected. We want derived classes to implement this. 160 """Attempts to terminate the process in a polite way. 161 162 This terminate method is intended to give the child process a 163 chance to clean up and exit on its own, possibly with a request 164 to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog, 165 etc.) If new_process_group was set in the process creation method 166 and the platform supports it, this terminate call will attempt to 167 kill the whole process tree rooted in this child process. 168 169 @param popen_process the subprocess.Popen-like object returned 170 by one of the process-creation methods of this class. 171 172 @param log_file file-like object used to emit error-related 173 logging info. May be None if no error-related info is desired. 174 175 @param want_core True if the caller would like to get a core 176 dump (or the analogous crash report) from the terminated process. 177 """ 178 popen_process.terminate() 179 180 def hard_terminate(self, popen_process, log_file=None): 181 # pylint: disable=no-self-use,unused-argument 182 # As expected. We want derived classes to implement this. 183 """Attempts to terminate the process immediately. 184 185 This terminate method is intended to kill child process in 186 a manner in which the child process has no ability to block, 187 and also has no ability to clean up properly. If new_process_group 188 was specified when creating the process, and if the platform 189 implementation supports it, this will attempt to kill the 190 whole process tree rooted in the child process. 191 192 @param popen_process the subprocess.Popen-like object returned 193 by one of the process-creation methods of this class. 194 195 @param log_file file-like object used to emit error-related 196 logging info. May be None if no error-related info is desired. 197 """ 198 popen_process.kill() 199 200 def was_soft_terminate(self, returncode, with_core): 201 # pylint: disable=no-self-use,unused-argument 202 # As expected. We want derived classes to implement this. 203 """Returns if Popen-like object returncode matches soft terminate. 204 205 @param returncode the returncode from the Popen-like object that 206 terminated with a given return code. 207 208 @param with_core indicates whether the returncode should match 209 a core-generating return signal. 210 211 @return True when the returncode represents what the system would 212 issue when a soft_terminate() with the given with_core arg occurred; 213 False otherwise. 214 """ 215 if not self.supports_soft_terminate(): 216 # If we don't support soft termination on this platform, 217 # then this should always be False. 218 return False 219 else: 220 # Once a platform claims to support soft terminate, it 221 # needs to be able to identify it by overriding this method. 222 raise Exception("platform needs to implement") 223 224 def was_hard_terminate(self, returncode): 225 # pylint: disable=no-self-use,unused-argument 226 # As expected. We want derived classes to implement this. 227 """Returns if Popen-like object returncode matches that of a hard 228 terminate attempt. 229 230 @param returncode the returncode from the Popen-like object that 231 terminated with a given return code. 232 233 @return True when the returncode represents what the system would 234 issue when a hard_terminate() occurred; False 235 otherwise. 236 """ 237 raise Exception("platform needs to implement") 238 239 def soft_terminate_signals(self): 240 # pylint: disable=no-self-use 241 """Retrieve signal numbers that can be sent to soft terminate. 242 @return a list of signal numbers that can be sent to soft terminate 243 a process, or None if not applicable. 244 """ 245 return None 246 247 def is_exceptional_exit(self, popen_status): 248 """Returns whether the program exit status is exceptional. 249 250 Returns whether the return code from a Popen process is exceptional 251 (e.g. signals on POSIX systems). 252 253 Derived classes should override this if they can detect exceptional 254 program exit. 255 256 @return True if the given popen_status represents an exceptional 257 program exit; False otherwise. 258 """ 259 return False 260 261 def exceptional_exit_details(self, popen_status): 262 """Returns the normalized exceptional exit code and a description. 263 264 Given an exceptional exit code, returns the integral value of the 265 exception (e.g. signal number for POSIX) and a description (e.g. 266 signal name on POSIX) for the result. 267 268 Derived classes should override this if they can detect exceptional 269 program exit. 270 271 It is fine to not implement this so long as is_exceptional_exit() 272 always returns False. 273 274 @return (normalized exception code, symbolic exception description) 275 """ 276 raise Exception("exception_exit_details() called on unsupported class") 277 278 279class UnixProcessHelper(ProcessHelper): 280 """Provides a ProcessHelper for Unix-like operating systems. 281 282 This implementation supports anything that looks Posix-y 283 (e.g. Darwin, Linux, *BSD, etc.) 284 """ 285 286 def __init__(self): 287 super(UnixProcessHelper, self).__init__() 288 289 @classmethod 290 def _create_new_process_group(cls): 291 """Creates a new process group for the calling process.""" 292 os.setpgid(os.getpid(), os.getpid()) 293 294 def create_piped_process(self, command, new_process_group=True): 295 # Determine what to run after the fork but before the exec. 296 if new_process_group: 297 preexec_func = self._create_new_process_group 298 else: 299 preexec_func = None 300 301 # Create the process. 302 process = subprocess.Popen( 303 command, 304 stdin=subprocess.PIPE, 305 stdout=subprocess.PIPE, 306 stderr=subprocess.PIPE, 307 universal_newlines=True, # Elicits automatic byte -> string decoding in Py3 308 close_fds=True, 309 preexec_fn=preexec_func) 310 311 # Remember whether we're using process groups for this 312 # process. 313 process.using_process_groups = new_process_group 314 return process 315 316 def supports_soft_terminate(self): 317 # POSIX does support a soft terminate via: 318 # * SIGTERM (no core requested) 319 # * SIGQUIT (core requested if enabled, see ulimit -c) 320 return True 321 322 @classmethod 323 def _validate_pre_terminate(cls, popen_process, log_file): 324 # Validate args. 325 if popen_process is None: 326 raise ValueError("popen_process is None") 327 328 # Ensure we have something that looks like a valid process. 329 if popen_process.pid < 1: 330 if log_file: 331 log_file.write("skipping soft_terminate(): no process id") 332 return False 333 334 # We only do the process liveness check if we're not using 335 # process groups. With process groups, checking if the main 336 # inferior process is dead and short circuiting here is no 337 # good - children of it in the process group could still be 338 # alive, and they should be killed during a timeout. 339 if not popen_process.using_process_groups: 340 # Don't kill if it's already dead. 341 popen_process.poll() 342 if popen_process.returncode is not None: 343 # It has a returncode. It has already stopped. 344 if log_file: 345 log_file.write( 346 "requested to terminate pid {} but it has already " 347 "terminated, returncode {}".format( 348 popen_process.pid, popen_process.returncode)) 349 # Move along... 350 return False 351 352 # Good to go. 353 return True 354 355 def _kill_with_signal(self, popen_process, log_file, signum): 356 # Validate we're ready to terminate this. 357 if not self._validate_pre_terminate(popen_process, log_file): 358 return 359 360 # Choose kill mechanism based on whether we're targeting 361 # a process group or just a process. 362 try: 363 if popen_process.using_process_groups: 364 # if log_file: 365 # log_file.write( 366 # "sending signum {} to process group {} now\n".format( 367 # signum, popen_process.pid)) 368 os.killpg(popen_process.pid, signum) 369 else: 370 # if log_file: 371 # log_file.write( 372 # "sending signum {} to process {} now\n".format( 373 # signum, popen_process.pid)) 374 os.kill(popen_process.pid, signum) 375 except OSError as error: 376 import errno 377 if error.errno == errno.ESRCH: 378 # This is okay - failed to find the process. It may be that 379 # that the timeout pre-kill hook eliminated the process. We'll 380 # ignore. 381 pass 382 else: 383 raise 384 385 def soft_terminate(self, popen_process, log_file=None, want_core=True): 386 # Choose signal based on desire for core file. 387 if want_core: 388 # SIGQUIT will generate core by default. Can be caught. 389 signum = signal.SIGQUIT 390 else: 391 # SIGTERM is the traditional nice way to kill a process. 392 # Can be caught, doesn't generate a core. 393 signum = signal.SIGTERM 394 395 self._kill_with_signal(popen_process, log_file, signum) 396 397 def hard_terminate(self, popen_process, log_file=None): 398 self._kill_with_signal(popen_process, log_file, signal.SIGKILL) 399 400 def was_soft_terminate(self, returncode, with_core): 401 if with_core: 402 return returncode == -signal.SIGQUIT 403 else: 404 return returncode == -signal.SIGTERM 405 406 def was_hard_terminate(self, returncode): 407 return returncode == -signal.SIGKILL 408 409 def soft_terminate_signals(self): 410 return [signal.SIGQUIT, signal.SIGTERM] 411 412 def is_exceptional_exit(self, popen_status): 413 return popen_status < 0 414 415 @classmethod 416 def _signal_names_by_number(cls): 417 return dict( 418 (k, v) for v, k in reversed(sorted(signal.__dict__.items())) 419 if v.startswith('SIG') and not v.startswith('SIG_')) 420 421 def exceptional_exit_details(self, popen_status): 422 signo = -popen_status 423 signal_names_by_number = self._signal_names_by_number() 424 signal_name = signal_names_by_number.get(signo, "") 425 return (signo, signal_name) 426 427 428class WindowsProcessHelper(ProcessHelper): 429 """Provides a Windows implementation of the ProcessHelper class.""" 430 431 def __init__(self): 432 super(WindowsProcessHelper, self).__init__() 433 434 def create_piped_process(self, command, new_process_group=True): 435 if new_process_group: 436 # We need this flag if we want os.kill() to work on the subprocess. 437 creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP 438 else: 439 creation_flags = 0 440 441 return subprocess.Popen( 442 command, 443 stdin=subprocess.PIPE, 444 stdout=subprocess.PIPE, 445 stderr=subprocess.PIPE, 446 universal_newlines=True, # Elicits automatic byte -> string decoding in Py3 447 creationflags=creation_flags) 448 449 def was_hard_terminate(self, returncode): 450 return returncode != 0 451 452 453class ProcessDriver(object): 454 """Drives a child process, notifies on important events, and can timeout. 455 456 Clients are expected to derive from this class and override the 457 on_process_started and on_process_exited methods if they want to 458 hook either of those. 459 460 This class supports timing out the child process in a platform-agnostic 461 way. The on_process_exited method is informed if the exit was natural 462 or if it was due to a timeout. 463 """ 464 465 def __init__(self, soft_terminate_timeout=10.0): 466 super(ProcessDriver, self).__init__() 467 self.process_helper = ProcessHelper.process_helper() 468 self.pid = None 469 # Create the synchronization event for notifying when the 470 # inferior dotest process is complete. 471 self.done_event = threading.Event() 472 self.io_thread = None 473 self.process = None 474 # Number of seconds to wait for the soft terminate to 475 # wrap up, before moving to more drastic measures. 476 # Might want this longer if core dumps are generated and 477 # take a long time to write out. 478 self.soft_terminate_timeout = soft_terminate_timeout 479 # Number of seconds to wait for the hard terminate to 480 # wrap up, before giving up on the io thread. This should 481 # be fast. 482 self.hard_terminate_timeout = 5.0 483 self.returncode = None 484 485 # ============================================= 486 # Methods for subclasses to override if desired. 487 # ============================================= 488 489 def on_process_started(self): 490 pass 491 492 def on_process_exited(self, command, output, was_timeout, exit_status): 493 pass 494 495 def on_timeout_pre_kill(self): 496 """Called after the timeout interval elapses but before killing it. 497 498 This method is added to enable derived classes the ability to do 499 something to the process prior to it being killed. For example, 500 this would be a good spot to run a program that samples the process 501 to see what it was doing (or not doing). 502 503 Do not attempt to reap the process (i.e. use wait()) in this method. 504 That will interfere with the kill mechanism and return code processing. 505 """ 506 pass 507 508 def write(self, content): 509 # pylint: disable=no-self-use 510 # Intended - we want derived classes to be able to override 511 # this and use any self state they may contain. 512 sys.stdout.write(content) 513 514 # ============================================================== 515 # Operations used to drive processes. Clients will want to call 516 # one of these. 517 # ============================================================== 518 519 def run_command(self, command): 520 # Start up the child process and the thread that does the 521 # communication pump. 522 self._start_process_and_io_thread(command) 523 524 # Wait indefinitely for the child process to finish 525 # communicating. This indicates it has closed stdout/stderr 526 # pipes and is done. 527 self.io_thread.join() 528 self.returncode = self.process.wait() 529 if self.returncode is None: 530 raise Exception( 531 "no exit status available for pid {} after the " 532 " inferior dotest.py should have completed".format( 533 self.process.pid)) 534 535 # Notify of non-timeout exit. 536 self.on_process_exited( 537 command, 538 self.io_thread.output, 539 False, 540 self.returncode) 541 542 def run_command_with_timeout(self, command, timeout, want_core): 543 # Figure out how many seconds our timeout description is requesting. 544 timeout_seconds = timeout_to_seconds(timeout) 545 546 # Start up the child process and the thread that does the 547 # communication pump. 548 self._start_process_and_io_thread(command) 549 550 self._wait_with_timeout(timeout_seconds, command, want_core) 551 552 # ================ 553 # Internal details. 554 # ================ 555 556 def _start_process_and_io_thread(self, command): 557 # Create the process. 558 self.process = self.process_helper.create_piped_process(command) 559 self.pid = self.process.pid 560 self.on_process_started() 561 562 # Ensure the event is cleared that is used for signaling 563 # from the communication() thread when communication is 564 # complete (i.e. the inferior process has finished). 565 self.done_event.clear() 566 567 self.io_thread = CommunicatorThread( 568 self.process, self.done_event, self.write) 569 self.io_thread.start() 570 571 def _attempt_soft_kill(self, want_core): 572 # The inferior dotest timed out. Attempt to clean it 573 # with a non-drastic method (so it can clean up properly 574 # and/or generate a core dump). Often the OS can't guarantee 575 # that the process will really terminate after this. 576 self.process_helper.soft_terminate( 577 self.process, 578 want_core=want_core, 579 log_file=self) 580 581 # Now wait up to a certain timeout period for the io thread 582 # to say that the communication ended. If that wraps up 583 # within our soft terminate timeout, we're all done here. 584 self.io_thread.join(self.soft_terminate_timeout) 585 if not self.io_thread.is_alive(): 586 # stdout/stderr were closed on the child process side. We 587 # should be able to wait and reap the child process here. 588 self.returncode = self.process.wait() 589 # We terminated, and the done_trying result is n/a 590 terminated = True 591 done_trying = None 592 else: 593 self.write("soft kill attempt of process {} timed out " 594 "after {} seconds\n".format( 595 self.process.pid, self.soft_terminate_timeout)) 596 terminated = False 597 done_trying = False 598 return terminated, done_trying 599 600 def _attempt_hard_kill(self): 601 # Instruct the process to terminate and really force it to 602 # happen. Don't give the process a chance to ignore. 603 self.process_helper.hard_terminate( 604 self.process, 605 log_file=self) 606 607 # Reap the child process. This should not hang as the 608 # hard_kill() mechanism is supposed to really kill it. 609 # Improvement option: 610 # If this does ever hang, convert to a self.process.poll() 611 # loop checking on self.process.returncode until it is not 612 # None or the timeout occurs. 613 self.returncode = self.process.wait() 614 615 # Wait a few moments for the io thread to finish... 616 self.io_thread.join(self.hard_terminate_timeout) 617 if self.io_thread.is_alive(): 618 # ... but this is not critical if it doesn't end for some 619 # reason. 620 self.write( 621 "hard kill of process {} timed out after {} seconds waiting " 622 "for the io thread (ignoring)\n".format( 623 self.process.pid, self.hard_terminate_timeout)) 624 625 # Set if it terminated. (Set up for optional improvement above). 626 terminated = self.returncode is not None 627 # Nothing else to try. 628 done_trying = True 629 630 return terminated, done_trying 631 632 def _attempt_termination(self, attempt_count, want_core): 633 if self.process_helper.supports_soft_terminate(): 634 # When soft termination is supported, we first try to stop 635 # the process with a soft terminate. Failing that, we try 636 # the hard terminate option. 637 if attempt_count == 1: 638 return self._attempt_soft_kill(want_core) 639 elif attempt_count == 2: 640 return self._attempt_hard_kill() 641 else: 642 # We don't have anything else to try. 643 terminated = self.returncode is not None 644 done_trying = True 645 return terminated, done_trying 646 else: 647 # We only try the hard terminate option when there 648 # is no soft terminate available. 649 if attempt_count == 1: 650 return self._attempt_hard_kill() 651 else: 652 # We don't have anything else to try. 653 terminated = self.returncode is not None 654 done_trying = True 655 return terminated, done_trying 656 657 def _wait_with_timeout(self, timeout_seconds, command, want_core): 658 # Allow up to timeout seconds for the io thread to wrap up. 659 # If that completes, the child process should be done. 660 completed_normally = self.done_event.wait(timeout_seconds) 661 if completed_normally: 662 # Reap the child process here. 663 self.returncode = self.process.wait() 664 else: 665 666 # Allow derived classes to do some work after we detected 667 # a timeout but before we touch the timed-out process. 668 self.on_timeout_pre_kill() 669 670 # Prepare to stop the process 671 process_terminated = completed_normally 672 terminate_attempt_count = 0 673 674 # Try as many attempts as we support for trying to shut down 675 # the child process if it's not already shut down. 676 while not process_terminated: 677 terminate_attempt_count += 1 678 # Attempt to terminate. 679 process_terminated, done_trying = self._attempt_termination( 680 terminate_attempt_count, want_core) 681 # Check if there's nothing more to try. 682 if done_trying: 683 # Break out of our termination attempt loop. 684 break 685 686 # At this point, we're calling it good. The process 687 # finished gracefully, was shut down after one or more 688 # attempts, or we failed but gave it our best effort. 689 self.on_process_exited( 690 command, 691 self.io_thread.output, 692 not completed_normally, 693 self.returncode) 694 695 696def patched_init(self, *args, **kwargs): 697 self.original_init(*args, **kwargs) 698 # Initialize our condition variable that protects wait()/poll(). 699 self.wait_condition = threading.Condition() 700 701 702def patched_wait(self, *args, **kwargs): 703 self.wait_condition.acquire() 704 try: 705 result = self.original_wait(*args, **kwargs) 706 # The process finished. Signal the condition. 707 self.wait_condition.notify_all() 708 return result 709 finally: 710 self.wait_condition.release() 711 712 713def patched_poll(self, *args, **kwargs): 714 self.wait_condition.acquire() 715 try: 716 result = self.original_poll(*args, **kwargs) 717 if self.returncode is not None: 718 # We did complete, and we have the return value. 719 # Signal the event to indicate we're done. 720 self.wait_condition.notify_all() 721 return result 722 finally: 723 self.wait_condition.release() 724 725 726def patch_up_subprocess_popen(): 727 subprocess.Popen.original_init = subprocess.Popen.__init__ 728 subprocess.Popen.__init__ = patched_init 729 730 subprocess.Popen.original_wait = subprocess.Popen.wait 731 subprocess.Popen.wait = patched_wait 732 733 subprocess.Popen.original_poll = subprocess.Popen.poll 734 subprocess.Popen.poll = patched_poll 735 736# Replace key subprocess.Popen() threading-unprotected methods with 737# threading-protected versions. 738patch_up_subprocess_popen() 739