1""" 2Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3See https://llvm.org/LICENSE.txt for license information. 4SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5 6Provides classes used by the test results reporting infrastructure 7within the LLDB test suite. 8 9 10This module provides process-management support for the LLDB test 11running infrastructure. 12""" 13 14# System imports 15import os 16import re 17import signal 18import subprocess 19import sys 20import threading 21 22 23class CommunicatorThread(threading.Thread): 24 """Provides a thread class that communicates with a subprocess.""" 25 26 def __init__(self, process, event, output_file): 27 super(CommunicatorThread, self).__init__() 28 # Don't let this thread prevent shutdown. 29 self.daemon = True 30 self.process = process 31 self.pid = process.pid 32 self.event = event 33 self.output_file = output_file 34 self.output = None 35 36 def run(self): 37 try: 38 # Communicate with the child process. 39 # This will not complete until the child process terminates. 40 self.output = self.process.communicate() 41 except Exception as exception: # pylint: disable=broad-except 42 if self.output_file: 43 self.output_file.write( 44 "exception while using communicate() for pid: {}\n".format( 45 exception 46 ) 47 ) 48 finally: 49 # Signal that the thread's run is complete. 50 self.event.set() 51 52 53# Provides a regular expression for matching gtimeout-based durations. 54TIMEOUT_REGEX = re.compile(r"(^\d+)([smhd])?$") 55 56 57def timeout_to_seconds(timeout): 58 """Converts timeout/gtimeout timeout values into seconds. 59 60 @param timeout a timeout in the form of xm representing x minutes. 61 62 @return None if timeout is None, or the number of seconds as a float 63 if a valid timeout format was specified. 64 """ 65 if timeout is None: 66 return None 67 else: 68 match = TIMEOUT_REGEX.match(timeout) 69 if match: 70 value = float(match.group(1)) 71 units = match.group(2) 72 if units is None: 73 # default is seconds. No conversion necessary. 74 return value 75 elif units == "s": 76 # Seconds. No conversion necessary. 77 return value 78 elif units == "m": 79 # Value is in minutes. 80 return 60.0 * value 81 elif units == "h": 82 # Value is in hours. 83 return (60.0 * 60.0) * value 84 elif units == "d": 85 # Value is in days. 86 return 24 * (60.0 * 60.0) * value 87 else: 88 raise Exception("unexpected units value '{}'".format(units)) 89 else: 90 raise Exception("could not parse TIMEOUT spec '{}'".format(timeout)) 91 92 93class ProcessHelper(object): 94 """Provides an interface for accessing process-related functionality. 95 96 This class provides a factory method that gives the caller a 97 platform-specific implementation instance of the class. 98 99 Clients of the class should stick to the methods provided in this 100 base class. 101 102 \see ProcessHelper.process_helper() 103 """ 104 105 def __init__(self): 106 super(ProcessHelper, self).__init__() 107 108 @classmethod 109 def process_helper(cls): 110 """Returns a platform-specific ProcessHelper instance. 111 @return a ProcessHelper instance that does the right thing for 112 the current platform. 113 """ 114 115 # If you add a new platform, create an instance here and 116 # return it. 117 if os.name == "nt": 118 return WindowsProcessHelper() 119 else: 120 # For all POSIX-like systems. 121 return UnixProcessHelper() 122 123 def create_piped_process(self, command, new_process_group=True): 124 # pylint: disable=no-self-use,unused-argument 125 # As expected. We want derived classes to implement this. 126 """Creates a subprocess.Popen-based class with I/O piped to the parent. 127 128 @param command the command line list as would be passed to 129 subprocess.Popen(). Use the list form rather than the string form. 130 131 @param new_process_group indicates if the caller wants the 132 process to be created in its own process group. Each OS handles 133 this concept differently. It provides a level of isolation and 134 can simplify or enable terminating the process tree properly. 135 136 @return a subprocess.Popen-like object. 137 """ 138 raise Exception("derived class must implement") 139 140 def supports_soft_terminate(self): 141 # pylint: disable=no-self-use 142 # As expected. We want derived classes to implement this. 143 """Indicates if the platform supports soft termination. 144 145 Soft termination is the concept of a terminate mechanism that 146 allows the target process to shut down nicely, but with the 147 catch that the process might choose to ignore it. 148 149 Platform supporter note: only mark soft terminate as supported 150 if the target process has some way to evade the soft terminate 151 request; otherwise, just support the hard terminate method. 152 153 @return True if the platform supports a soft terminate mechanism. 154 """ 155 # By default, we do not support a soft terminate mechanism. 156 return False 157 158 def soft_terminate(self, popen_process, log_file=None, want_core=True): 159 # pylint: disable=no-self-use,unused-argument 160 # As expected. We want derived classes to implement this. 161 """Attempts to terminate the process in a polite way. 162 163 This terminate method is intended to give the child process a 164 chance to clean up and exit on its own, possibly with a request 165 to drop a core file or equivalent (i.e. [mini-]crashdump, crashlog, 166 etc.) If new_process_group was set in the process creation method 167 and the platform supports it, this terminate call will attempt to 168 kill the whole process tree rooted in this child process. 169 170 @param popen_process the subprocess.Popen-like object returned 171 by one of the process-creation methods of this class. 172 173 @param log_file file-like object used to emit error-related 174 logging info. May be None if no error-related info is desired. 175 176 @param want_core True if the caller would like to get a core 177 dump (or the analogous crash report) from the terminated process. 178 """ 179 popen_process.terminate() 180 181 def hard_terminate(self, popen_process, log_file=None): 182 # pylint: disable=no-self-use,unused-argument 183 # As expected. We want derived classes to implement this. 184 """Attempts to terminate the process immediately. 185 186 This terminate method is intended to kill child process in 187 a manner in which the child process has no ability to block, 188 and also has no ability to clean up properly. If new_process_group 189 was specified when creating the process, and if the platform 190 implementation supports it, this will attempt to kill the 191 whole process tree rooted in the child process. 192 193 @param popen_process the subprocess.Popen-like object returned 194 by one of the process-creation methods of this class. 195 196 @param log_file file-like object used to emit error-related 197 logging info. May be None if no error-related info is desired. 198 """ 199 popen_process.kill() 200 201 def was_soft_terminate(self, returncode, with_core): 202 # pylint: disable=no-self-use,unused-argument 203 # As expected. We want derived classes to implement this. 204 """Returns if Popen-like object returncode matches soft terminate. 205 206 @param returncode the returncode from the Popen-like object that 207 terminated with a given return code. 208 209 @param with_core indicates whether the returncode should match 210 a core-generating return signal. 211 212 @return True when the returncode represents what the system would 213 issue when a soft_terminate() with the given with_core arg occurred; 214 False otherwise. 215 """ 216 if not self.supports_soft_terminate(): 217 # If we don't support soft termination on this platform, 218 # then this should always be False. 219 return False 220 else: 221 # Once a platform claims to support soft terminate, it 222 # needs to be able to identify it by overriding this method. 223 raise Exception("platform needs to implement") 224 225 def was_hard_terminate(self, returncode): 226 # pylint: disable=no-self-use,unused-argument 227 # As expected. We want derived classes to implement this. 228 """Returns if Popen-like object returncode matches that of a hard 229 terminate attempt. 230 231 @param returncode the returncode from the Popen-like object that 232 terminated with a given return code. 233 234 @return True when the returncode represents what the system would 235 issue when a hard_terminate() occurred; False 236 otherwise. 237 """ 238 raise Exception("platform needs to implement") 239 240 def soft_terminate_signals(self): 241 # pylint: disable=no-self-use 242 """Retrieve signal numbers that can be sent to soft terminate. 243 @return a list of signal numbers that can be sent to soft terminate 244 a process, or None if not applicable. 245 """ 246 return None 247 248 def is_exceptional_exit(self, popen_status): 249 """Returns whether the program exit status is exceptional. 250 251 Returns whether the return code from a Popen process is exceptional 252 (e.g. signals on POSIX systems). 253 254 Derived classes should override this if they can detect exceptional 255 program exit. 256 257 @return True if the given popen_status represents an exceptional 258 program exit; False otherwise. 259 """ 260 return False 261 262 def exceptional_exit_details(self, popen_status): 263 """Returns the normalized exceptional exit code and a description. 264 265 Given an exceptional exit code, returns the integral value of the 266 exception (e.g. signal number for POSIX) and a description (e.g. 267 signal name on POSIX) for the result. 268 269 Derived classes should override this if they can detect exceptional 270 program exit. 271 272 It is fine to not implement this so long as is_exceptional_exit() 273 always returns False. 274 275 @return (normalized exception code, symbolic exception description) 276 """ 277 raise Exception("exception_exit_details() called on unsupported class") 278 279 280class UnixProcessHelper(ProcessHelper): 281 """Provides a ProcessHelper for Unix-like operating systems. 282 283 This implementation supports anything that looks Posix-y 284 (e.g. Darwin, Linux, *BSD, etc.) 285 """ 286 287 def __init__(self): 288 super(UnixProcessHelper, self).__init__() 289 290 @classmethod 291 def _create_new_process_group(cls): 292 """Creates a new process group for the calling process.""" 293 os.setpgid(os.getpid(), os.getpid()) 294 295 def create_piped_process(self, command, new_process_group=True): 296 # Determine what to run after the fork but before the exec. 297 if new_process_group: 298 preexec_func = self._create_new_process_group 299 else: 300 preexec_func = None 301 302 # Create the process. 303 process = subprocess.Popen( 304 command, 305 stdin=subprocess.PIPE, 306 stdout=subprocess.PIPE, 307 stderr=subprocess.PIPE, 308 universal_newlines=True, # Elicits automatic byte -> string decoding in Py3 309 close_fds=True, 310 preexec_fn=preexec_func, 311 ) 312 313 # Remember whether we're using process groups for this 314 # process. 315 process.using_process_groups = new_process_group 316 return process 317 318 def supports_soft_terminate(self): 319 # POSIX does support a soft terminate via: 320 # * SIGTERM (no core requested) 321 # * SIGQUIT (core requested if enabled, see ulimit -c) 322 return True 323 324 @classmethod 325 def _validate_pre_terminate(cls, popen_process, log_file): 326 # Validate args. 327 if popen_process is None: 328 raise ValueError("popen_process is None") 329 330 # Ensure we have something that looks like a valid process. 331 if popen_process.pid < 1: 332 if log_file: 333 log_file.write("skipping soft_terminate(): no process id") 334 return False 335 336 # We only do the process liveness check if we're not using 337 # process groups. With process groups, checking if the main 338 # inferior process is dead and short circuiting here is no 339 # good - children of it in the process group could still be 340 # alive, and they should be killed during a timeout. 341 if not popen_process.using_process_groups: 342 # Don't kill if it's already dead. 343 popen_process.poll() 344 if popen_process.returncode is not None: 345 # It has a returncode. It has already stopped. 346 if log_file: 347 log_file.write( 348 "requested to terminate pid {} but it has already " 349 "terminated, returncode {}".format( 350 popen_process.pid, popen_process.returncode 351 ) 352 ) 353 # Move along... 354 return False 355 356 # Good to go. 357 return True 358 359 def _kill_with_signal(self, popen_process, log_file, signum): 360 # Validate we're ready to terminate this. 361 if not self._validate_pre_terminate(popen_process, log_file): 362 return 363 364 # Choose kill mechanism based on whether we're targeting 365 # a process group or just a process. 366 try: 367 if popen_process.using_process_groups: 368 # if log_file: 369 # log_file.write( 370 # "sending signum {} to process group {} now\n".format( 371 # signum, popen_process.pid)) 372 os.killpg(popen_process.pid, signum) 373 else: 374 # if log_file: 375 # log_file.write( 376 # "sending signum {} to process {} now\n".format( 377 # signum, popen_process.pid)) 378 os.kill(popen_process.pid, signum) 379 except OSError as error: 380 import errno 381 382 if error.errno == errno.ESRCH: 383 # This is okay - failed to find the process. It may be that 384 # that the timeout pre-kill hook eliminated the process. We'll 385 # ignore. 386 pass 387 else: 388 raise 389 390 def soft_terminate(self, popen_process, log_file=None, want_core=True): 391 # Choose signal based on desire for core file. 392 if want_core: 393 # SIGQUIT will generate core by default. Can be caught. 394 signum = signal.SIGQUIT 395 else: 396 # SIGTERM is the traditional nice way to kill a process. 397 # Can be caught, doesn't generate a core. 398 signum = signal.SIGTERM 399 400 self._kill_with_signal(popen_process, log_file, signum) 401 402 def hard_terminate(self, popen_process, log_file=None): 403 self._kill_with_signal(popen_process, log_file, signal.SIGKILL) 404 405 def was_soft_terminate(self, returncode, with_core): 406 if with_core: 407 return returncode == -signal.SIGQUIT 408 else: 409 return returncode == -signal.SIGTERM 410 411 def was_hard_terminate(self, returncode): 412 return returncode == -signal.SIGKILL 413 414 def soft_terminate_signals(self): 415 return [signal.SIGQUIT, signal.SIGTERM] 416 417 def is_exceptional_exit(self, popen_status): 418 return popen_status < 0 419 420 @classmethod 421 def _signal_names_by_number(cls): 422 return dict( 423 (k, v) 424 for v, k in reversed(sorted(signal.__dict__.items())) 425 if v.startswith("SIG") and not v.startswith("SIG_") 426 ) 427 428 def exceptional_exit_details(self, popen_status): 429 signo = -popen_status 430 signal_names_by_number = self._signal_names_by_number() 431 signal_name = signal_names_by_number.get(signo, "") 432 return (signo, signal_name) 433 434 435class WindowsProcessHelper(ProcessHelper): 436 """Provides a Windows implementation of the ProcessHelper class.""" 437 438 def __init__(self): 439 super(WindowsProcessHelper, self).__init__() 440 441 def create_piped_process(self, command, new_process_group=True): 442 if new_process_group: 443 # We need this flag if we want os.kill() to work on the subprocess. 444 creation_flags = subprocess.CREATE_NEW_PROCESS_GROUP 445 else: 446 creation_flags = 0 447 448 return subprocess.Popen( 449 command, 450 stdin=subprocess.PIPE, 451 stdout=subprocess.PIPE, 452 stderr=subprocess.PIPE, 453 universal_newlines=True, # Elicits automatic byte -> string decoding in Py3 454 creationflags=creation_flags, 455 ) 456 457 def was_hard_terminate(self, returncode): 458 return returncode != 0 459 460 461class ProcessDriver(object): 462 """Drives a child process, notifies on important events, and can timeout. 463 464 Clients are expected to derive from this class and override the 465 on_process_started and on_process_exited methods if they want to 466 hook either of those. 467 468 This class supports timing out the child process in a platform-agnostic 469 way. The on_process_exited method is informed if the exit was natural 470 or if it was due to a timeout. 471 """ 472 473 def __init__(self, soft_terminate_timeout=10.0): 474 super(ProcessDriver, self).__init__() 475 self.process_helper = ProcessHelper.process_helper() 476 self.pid = None 477 # Create the synchronization event for notifying when the 478 # inferior dotest process is complete. 479 self.done_event = threading.Event() 480 self.io_thread = None 481 self.process = None 482 # Number of seconds to wait for the soft terminate to 483 # wrap up, before moving to more drastic measures. 484 # Might want this longer if core dumps are generated and 485 # take a long time to write out. 486 self.soft_terminate_timeout = soft_terminate_timeout 487 # Number of seconds to wait for the hard terminate to 488 # wrap up, before giving up on the io thread. This should 489 # be fast. 490 self.hard_terminate_timeout = 5.0 491 self.returncode = None 492 493 # ============================================= 494 # Methods for subclasses to override if desired. 495 # ============================================= 496 497 def on_process_started(self): 498 pass 499 500 def on_process_exited(self, command, output, was_timeout, exit_status): 501 pass 502 503 def on_timeout_pre_kill(self): 504 """Called after the timeout interval elapses but before killing it. 505 506 This method is added to enable derived classes the ability to do 507 something to the process prior to it being killed. For example, 508 this would be a good spot to run a program that samples the process 509 to see what it was doing (or not doing). 510 511 Do not attempt to reap the process (i.e. use wait()) in this method. 512 That will interfere with the kill mechanism and return code processing. 513 """ 514 515 def write(self, content): 516 # pylint: disable=no-self-use 517 # Intended - we want derived classes to be able to override 518 # this and use any self state they may contain. 519 sys.stdout.write(content) 520 521 # ============================================================== 522 # Operations used to drive processes. Clients will want to call 523 # one of these. 524 # ============================================================== 525 526 def run_command(self, command): 527 # Start up the child process and the thread that does the 528 # communication pump. 529 self._start_process_and_io_thread(command) 530 531 # Wait indefinitely for the child process to finish 532 # communicating. This indicates it has closed stdout/stderr 533 # pipes and is done. 534 self.io_thread.join() 535 self.returncode = self.process.wait() 536 if self.returncode is None: 537 raise Exception( 538 "no exit status available for pid {} after the " 539 " inferior dotest.py should have completed".format(self.process.pid) 540 ) 541 542 # Notify of non-timeout exit. 543 self.on_process_exited(command, self.io_thread.output, False, self.returncode) 544 545 def run_command_with_timeout(self, command, timeout, want_core): 546 # Figure out how many seconds our timeout description is requesting. 547 timeout_seconds = timeout_to_seconds(timeout) 548 549 # Start up the child process and the thread that does the 550 # communication pump. 551 self._start_process_and_io_thread(command) 552 553 self._wait_with_timeout(timeout_seconds, command, want_core) 554 555 # ================ 556 # Internal details. 557 # ================ 558 559 def _start_process_and_io_thread(self, command): 560 # Create the process. 561 self.process = self.process_helper.create_piped_process(command) 562 self.pid = self.process.pid 563 self.on_process_started() 564 565 # Ensure the event is cleared that is used for signaling 566 # from the communication() thread when communication is 567 # complete (i.e. the inferior process has finished). 568 self.done_event.clear() 569 570 self.io_thread = CommunicatorThread(self.process, self.done_event, self.write) 571 self.io_thread.start() 572 573 def _attempt_soft_kill(self, want_core): 574 # The inferior dotest timed out. Attempt to clean it 575 # with a non-drastic method (so it can clean up properly 576 # and/or generate a core dump). Often the OS can't guarantee 577 # that the process will really terminate after this. 578 self.process_helper.soft_terminate( 579 self.process, want_core=want_core, log_file=self 580 ) 581 582 # Now wait up to a certain timeout period for the io thread 583 # to say that the communication ended. If that wraps up 584 # within our soft terminate timeout, we're all done here. 585 self.io_thread.join(self.soft_terminate_timeout) 586 if not self.io_thread.is_alive(): 587 # stdout/stderr were closed on the child process side. We 588 # should be able to wait and reap the child process here. 589 self.returncode = self.process.wait() 590 # We terminated, and the done_trying result is n/a 591 terminated = True 592 done_trying = None 593 else: 594 self.write( 595 "soft kill attempt of process {} timed out " 596 "after {} seconds\n".format( 597 self.process.pid, self.soft_terminate_timeout 598 ) 599 ) 600 terminated = False 601 done_trying = False 602 return terminated, done_trying 603 604 def _attempt_hard_kill(self): 605 # Instruct the process to terminate and really force it to 606 # happen. Don't give the process a chance to ignore. 607 self.process_helper.hard_terminate(self.process, log_file=self) 608 609 # Reap the child process. This should not hang as the 610 # hard_kill() mechanism is supposed to really kill it. 611 # Improvement option: 612 # If this does ever hang, convert to a self.process.poll() 613 # loop checking on self.process.returncode until it is not 614 # None or the timeout occurs. 615 self.returncode = self.process.wait() 616 617 # Wait a few moments for the io thread to finish... 618 self.io_thread.join(self.hard_terminate_timeout) 619 if self.io_thread.is_alive(): 620 # ... but this is not critical if it doesn't end for some 621 # reason. 622 self.write( 623 "hard kill of process {} timed out after {} seconds waiting " 624 "for the io thread (ignoring)\n".format( 625 self.process.pid, self.hard_terminate_timeout 626 ) 627 ) 628 629 # Set if it terminated. (Set up for optional improvement above). 630 terminated = self.returncode is not None 631 # Nothing else to try. 632 done_trying = True 633 634 return terminated, done_trying 635 636 def _attempt_termination(self, attempt_count, want_core): 637 if self.process_helper.supports_soft_terminate(): 638 # When soft termination is supported, we first try to stop 639 # the process with a soft terminate. Failing that, we try 640 # the hard terminate option. 641 if attempt_count == 1: 642 return self._attempt_soft_kill(want_core) 643 elif attempt_count == 2: 644 return self._attempt_hard_kill() 645 else: 646 # We don't have anything else to try. 647 terminated = self.returncode is not None 648 done_trying = True 649 return terminated, done_trying 650 else: 651 # We only try the hard terminate option when there 652 # is no soft terminate available. 653 if attempt_count == 1: 654 return self._attempt_hard_kill() 655 else: 656 # We don't have anything else to try. 657 terminated = self.returncode is not None 658 done_trying = True 659 return terminated, done_trying 660 661 def _wait_with_timeout(self, timeout_seconds, command, want_core): 662 # Allow up to timeout seconds for the io thread to wrap up. 663 # If that completes, the child process should be done. 664 completed_normally = self.done_event.wait(timeout_seconds) 665 if completed_normally: 666 # Reap the child process here. 667 self.returncode = self.process.wait() 668 else: 669 # Allow derived classes to do some work after we detected 670 # a timeout but before we touch the timed-out process. 671 self.on_timeout_pre_kill() 672 673 # Prepare to stop the process 674 process_terminated = completed_normally 675 terminate_attempt_count = 0 676 677 # Try as many attempts as we support for trying to shut down 678 # the child process if it's not already shut down. 679 while not process_terminated: 680 terminate_attempt_count += 1 681 # Attempt to terminate. 682 process_terminated, done_trying = self._attempt_termination( 683 terminate_attempt_count, want_core 684 ) 685 # Check if there's nothing more to try. 686 if done_trying: 687 # Break out of our termination attempt loop. 688 break 689 690 # At this point, we're calling it good. The process 691 # finished gracefully, was shut down after one or more 692 # attempts, or we failed but gave it our best effort. 693 self.on_process_exited( 694 command, self.io_thread.output, not completed_normally, self.returncode 695 ) 696 697 698def patched_init(self, *args, **kwargs): 699 self.original_init(*args, **kwargs) 700 # Initialize our condition variable that protects wait()/poll(). 701 self.wait_condition = threading.Condition() 702 703 704def patched_wait(self, *args, **kwargs): 705 self.wait_condition.acquire() 706 try: 707 result = self.original_wait(*args, **kwargs) 708 # The process finished. Signal the condition. 709 self.wait_condition.notify_all() 710 return result 711 finally: 712 self.wait_condition.release() 713 714 715def patched_poll(self, *args, **kwargs): 716 self.wait_condition.acquire() 717 try: 718 result = self.original_poll(*args, **kwargs) 719 if self.returncode is not None: 720 # We did complete, and we have the return value. 721 # Signal the event to indicate we're done. 722 self.wait_condition.notify_all() 723 return result 724 finally: 725 self.wait_condition.release() 726 727 728def patch_up_subprocess_popen(): 729 subprocess.Popen.original_init = subprocess.Popen.__init__ 730 subprocess.Popen.__init__ = patched_init 731 732 subprocess.Popen.original_wait = subprocess.Popen.wait 733 subprocess.Popen.wait = patched_wait 734 735 subprocess.Popen.original_poll = subprocess.Popen.poll 736 subprocess.Popen.poll = patched_poll 737 738 739# Replace key subprocess.Popen() threading-unprotected methods with 740# threading-protected versions. 741patch_up_subprocess_popen() 742