1from __future__ import absolute_import 2import errno 3import io 4import itertools 5import getopt 6import os, signal, subprocess, sys 7import re 8import stat 9import pathlib 10import platform 11import shlex 12import shutil 13import tempfile 14import threading 15import typing 16from typing import Optional, Tuple 17 18import io 19 20try: 21 from StringIO import StringIO 22except ImportError: 23 from io import StringIO 24 25from lit.ShCommands import GlobItem, Command 26import lit.ShUtil as ShUtil 27import lit.Test as Test 28import lit.util 29from lit.util import to_bytes, to_string, to_unicode 30from lit.BooleanExpression import BooleanExpression 31 32 33class InternalShellError(Exception): 34 def __init__(self, command, message): 35 self.command = command 36 self.message = message 37 38 39class ScriptFatal(Exception): 40 """ 41 A script had a fatal error such that there's no point in retrying. The 42 message has not been emitted on stdout or stderr but is instead included in 43 this exception. 44 """ 45 46 def __init__(self, message): 47 super().__init__(message) 48 49 50kIsWindows = platform.system() == "Windows" 51 52# Don't use close_fds on Windows. 53kUseCloseFDs = not kIsWindows 54 55# Use temporary files to replace /dev/null on Windows. 56kAvoidDevNull = kIsWindows 57kDevNull = "/dev/null" 58 59# A regex that matches %dbg(ARG), which lit inserts at the beginning of each 60# run command pipeline such that ARG specifies the pipeline's source line 61# number. lit later expands each %dbg(ARG) to a command that behaves as a null 62# command in the target shell so that the line number is seen in lit's verbose 63# mode. 64# 65# This regex captures ARG. ARG must not contain a right parenthesis, which 66# terminates %dbg. ARG must not contain quotes, in which ARG might be enclosed 67# during expansion. 68# 69# COMMAND that follows %dbg(ARG) is also captured. COMMAND can be 70# empty as a result of conditinal substitution. 71kPdbgRegex = "%dbg\\(([^)'\"]*)\\)((?:.|\\n)*)" 72 73 74def buildPdbgCommand(msg, cmd): 75 res = f"%dbg({msg}) {cmd}" 76 assert re.fullmatch( 77 kPdbgRegex, res 78 ), f"kPdbgRegex expected to match actual %dbg usage: {res}" 79 return res 80 81 82class ShellEnvironment(object): 83 84 """Mutable shell environment containing things like CWD and env vars. 85 86 Environment variables are not implemented, but cwd tracking is. In addition, 87 we maintain a dir stack for pushd/popd. 88 """ 89 90 def __init__(self, cwd, env): 91 self.cwd = cwd 92 self.env = dict(env) 93 self.dirStack = [] 94 95 def change_dir(self, newdir): 96 if os.path.isabs(newdir): 97 self.cwd = newdir 98 else: 99 self.cwd = lit.util.abs_path_preserve_drive(os.path.join(self.cwd, newdir)) 100 101 102class TimeoutHelper(object): 103 """ 104 Object used to helper manage enforcing a timeout in 105 _executeShCmd(). It is passed through recursive calls 106 to collect processes that have been executed so that when 107 the timeout happens they can be killed. 108 """ 109 110 def __init__(self, timeout): 111 self.timeout = timeout 112 self._procs = [] 113 self._timeoutReached = False 114 self._doneKillPass = False 115 # This lock will be used to protect concurrent access 116 # to _procs and _doneKillPass 117 self._lock = None 118 self._timer = None 119 120 def cancel(self): 121 if not self.active(): 122 return 123 self._timer.cancel() 124 125 def active(self): 126 return self.timeout > 0 127 128 def addProcess(self, proc): 129 if not self.active(): 130 return 131 needToRunKill = False 132 with self._lock: 133 self._procs.append(proc) 134 # Avoid re-entering the lock by finding out if kill needs to be run 135 # again here but call it if necessary once we have left the lock. 136 # We could use a reentrant lock here instead but this code seems 137 # clearer to me. 138 needToRunKill = self._doneKillPass 139 140 # The initial call to _kill() from the timer thread already happened so 141 # we need to call it again from this thread, otherwise this process 142 # will be left to run even though the timeout was already hit 143 if needToRunKill: 144 assert self.timeoutReached() 145 self._kill() 146 147 def startTimer(self): 148 if not self.active(): 149 return 150 151 # Do some late initialisation that's only needed 152 # if there is a timeout set 153 self._lock = threading.Lock() 154 self._timer = threading.Timer(self.timeout, self._handleTimeoutReached) 155 self._timer.start() 156 157 def _handleTimeoutReached(self): 158 self._timeoutReached = True 159 self._kill() 160 161 def timeoutReached(self): 162 return self._timeoutReached 163 164 def _kill(self): 165 """ 166 This method may be called multiple times as we might get unlucky 167 and be in the middle of creating a new process in _executeShCmd() 168 which won't yet be in ``self._procs``. By locking here and in 169 addProcess() we should be able to kill processes launched after 170 the initial call to _kill() 171 """ 172 with self._lock: 173 for p in self._procs: 174 lit.util.killProcessAndChildren(p.pid) 175 # Empty the list and note that we've done a pass over the list 176 self._procs = [] # Python2 doesn't have list.clear() 177 self._doneKillPass = True 178 179 180class ShellCommandResult(object): 181 """Captures the result of an individual command.""" 182 183 def __init__( 184 self, command, stdout, stderr, exitCode, timeoutReached, outputFiles=[] 185 ): 186 self.command = command 187 self.stdout = stdout 188 self.stderr = stderr 189 self.exitCode = exitCode 190 self.timeoutReached = timeoutReached 191 self.outputFiles = list(outputFiles) 192 193 194def executeShCmd(cmd, shenv, results, timeout=0): 195 """ 196 Wrapper around _executeShCmd that handles 197 timeout 198 """ 199 # Use the helper even when no timeout is required to make 200 # other code simpler (i.e. avoid bunch of ``is not None`` checks) 201 timeoutHelper = TimeoutHelper(timeout) 202 if timeout > 0: 203 timeoutHelper.startTimer() 204 finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper) 205 timeoutHelper.cancel() 206 timeoutInfo = None 207 if timeoutHelper.timeoutReached(): 208 timeoutInfo = "Reached timeout of {} seconds".format(timeout) 209 210 return (finalExitCode, timeoutInfo) 211 212 213def expand_glob(arg, cwd): 214 if isinstance(arg, GlobItem): 215 return sorted(arg.resolve(cwd)) 216 return [arg] 217 218 219def expand_glob_expressions(args, cwd): 220 result = [args[0]] 221 for arg in args[1:]: 222 result.extend(expand_glob(arg, cwd)) 223 return result 224 225 226def quote_windows_command(seq): 227 r""" 228 Reimplement Python's private subprocess.list2cmdline for MSys compatibility 229 230 Based on CPython implementation here: 231 https://hg.python.org/cpython/file/849826a900d2/Lib/subprocess.py#l422 232 233 Some core util distributions (MSys) don't tokenize command line arguments 234 the same way that MSVC CRT does. Lit rolls its own quoting logic similar to 235 the stock CPython logic to paper over these quoting and tokenization rule 236 differences. 237 238 We use the same algorithm from MSDN as CPython 239 (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we treat more 240 characters as needing quoting, such as double quotes themselves, and square 241 brackets. 242 243 For MSys based tools, this is very brittle though, because quoting an 244 argument makes the MSys based tool unescape backslashes where it shouldn't 245 (e.g. "a\b\\c\\\\d" becomes "a\b\c\\d" where it should stay as it was, 246 according to regular win32 command line parsing rules). 247 """ 248 result = [] 249 needquote = False 250 for arg in seq: 251 bs_buf = [] 252 253 # Add a space to separate this argument from the others 254 if result: 255 result.append(" ") 256 257 # This logic differs from upstream list2cmdline. 258 needquote = ( 259 (" " in arg) 260 or ("\t" in arg) 261 or ('"' in arg) 262 or ("[" in arg) 263 or (";" in arg) 264 or not arg 265 ) 266 if needquote: 267 result.append('"') 268 269 for c in arg: 270 if c == "\\": 271 # Don't know if we need to double yet. 272 bs_buf.append(c) 273 elif c == '"': 274 # Double backslashes. 275 result.append("\\" * len(bs_buf) * 2) 276 bs_buf = [] 277 result.append('\\"') 278 else: 279 # Normal char 280 if bs_buf: 281 result.extend(bs_buf) 282 bs_buf = [] 283 result.append(c) 284 285 # Add remaining backslashes, if any. 286 if bs_buf: 287 result.extend(bs_buf) 288 289 if needquote: 290 result.extend(bs_buf) 291 result.append('"') 292 293 return "".join(result) 294 295 296# args are from 'export' or 'env' command. 297# Skips the command, and parses its arguments. 298# Modifies env accordingly. 299# Returns copy of args without the command or its arguments. 300def updateEnv(env, args): 301 arg_idx_next = len(args) 302 unset_next_env_var = False 303 for arg_idx, arg in enumerate(args[1:]): 304 # Support for the -u flag (unsetting) for env command 305 # e.g., env -u FOO -u BAR will remove both FOO and BAR 306 # from the environment. 307 if arg == "-u": 308 unset_next_env_var = True 309 continue 310 if unset_next_env_var: 311 unset_next_env_var = False 312 if arg in env.env: 313 del env.env[arg] 314 continue 315 316 # Partition the string into KEY=VALUE. 317 key, eq, val = arg.partition("=") 318 # Stop if there was no equals. 319 if eq == "": 320 arg_idx_next = arg_idx + 1 321 break 322 env.env[key] = val 323 return args[arg_idx_next:] 324 325 326def executeBuiltinCd(cmd, shenv): 327 """executeBuiltinCd - Change the current directory.""" 328 if len(cmd.args) != 2: 329 raise InternalShellError(cmd, "'cd' supports only one argument") 330 # Update the cwd in the parent environment. 331 shenv.change_dir(cmd.args[1]) 332 # The cd builtin always succeeds. If the directory does not exist, the 333 # following Popen calls will fail instead. 334 return ShellCommandResult(cmd, "", "", 0, False) 335 336 337def executeBuiltinPushd(cmd, shenv): 338 """executeBuiltinPushd - Change the current dir and save the old.""" 339 if len(cmd.args) != 2: 340 raise InternalShellError(cmd, "'pushd' supports only one argument") 341 shenv.dirStack.append(shenv.cwd) 342 shenv.change_dir(cmd.args[1]) 343 return ShellCommandResult(cmd, "", "", 0, False) 344 345 346def executeBuiltinPopd(cmd, shenv): 347 """executeBuiltinPopd - Restore a previously saved working directory.""" 348 if len(cmd.args) != 1: 349 raise InternalShellError(cmd, "'popd' does not support arguments") 350 if not shenv.dirStack: 351 raise InternalShellError(cmd, "popd: directory stack empty") 352 shenv.cwd = shenv.dirStack.pop() 353 return ShellCommandResult(cmd, "", "", 0, False) 354 355 356def executeBuiltinExport(cmd, shenv): 357 """executeBuiltinExport - Set an environment variable.""" 358 if len(cmd.args) != 2: 359 raise InternalShellError(cmd, "'export' supports only one argument") 360 updateEnv(shenv, cmd.args) 361 return ShellCommandResult(cmd, "", "", 0, False) 362 363 364def executeBuiltinEcho(cmd, shenv): 365 """Interpret a redirected echo or @echo command""" 366 opened_files = [] 367 stdin, stdout, stderr = processRedirects(cmd, subprocess.PIPE, shenv, opened_files) 368 if stdin != subprocess.PIPE or stderr != subprocess.PIPE: 369 raise InternalShellError( 370 cmd, f"stdin and stderr redirects not supported for {cmd.args[0]}" 371 ) 372 373 # Some tests have un-redirected echo commands to help debug test failures. 374 # Buffer our output and return it to the caller. 375 is_redirected = True 376 encode = lambda x: x 377 if stdout == subprocess.PIPE: 378 is_redirected = False 379 stdout = StringIO() 380 elif kIsWindows: 381 # Reopen stdout in binary mode to avoid CRLF translation. The versions 382 # of echo we are replacing on Windows all emit plain LF, and the LLVM 383 # tests now depend on this. 384 # When we open as binary, however, this also means that we have to write 385 # 'bytes' objects to stdout instead of 'str' objects. 386 encode = lit.util.to_bytes 387 stdout = open(stdout.name, stdout.mode + "b") 388 opened_files.append((None, None, stdout, None)) 389 390 # Implement echo flags. We only support -e and -n, and not yet in 391 # combination. We have to ignore unknown flags, because `echo "-D FOO"` 392 # prints the dash. 393 args = cmd.args[1:] 394 interpret_escapes = False 395 write_newline = True 396 while len(args) >= 1 and args[0] in ("-e", "-n"): 397 flag = args[0] 398 args = args[1:] 399 if flag == "-e": 400 interpret_escapes = True 401 elif flag == "-n": 402 write_newline = False 403 404 def maybeUnescape(arg): 405 if not interpret_escapes: 406 return arg 407 408 arg = lit.util.to_bytes(arg) 409 return arg.decode("unicode_escape") 410 411 if args: 412 for arg in args[:-1]: 413 stdout.write(encode(maybeUnescape(arg))) 414 stdout.write(encode(" ")) 415 stdout.write(encode(maybeUnescape(args[-1]))) 416 if write_newline: 417 stdout.write(encode("\n")) 418 419 for (name, mode, f, path) in opened_files: 420 f.close() 421 422 output = "" if is_redirected else stdout.getvalue() 423 return ShellCommandResult(cmd, output, "", 0, False) 424 425 426def executeBuiltinMkdir(cmd, cmd_shenv): 427 """executeBuiltinMkdir - Create new directories.""" 428 args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:] 429 try: 430 opts, args = getopt.gnu_getopt(args, "p") 431 except getopt.GetoptError as err: 432 raise InternalShellError(cmd, "Unsupported: 'mkdir': %s" % str(err)) 433 434 parent = False 435 for o, a in opts: 436 if o == "-p": 437 parent = True 438 else: 439 assert False, "unhandled option" 440 441 if len(args) == 0: 442 raise InternalShellError(cmd, "Error: 'mkdir' is missing an operand") 443 444 stderr = StringIO() 445 exitCode = 0 446 for dir in args: 447 cwd = cmd_shenv.cwd 448 dir = to_unicode(dir) if kIsWindows else to_bytes(dir) 449 cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) 450 if not os.path.isabs(dir): 451 dir = lit.util.abs_path_preserve_drive(os.path.join(cwd, dir)) 452 if parent: 453 lit.util.mkdir_p(dir) 454 else: 455 try: 456 lit.util.mkdir(dir) 457 except OSError as err: 458 stderr.write("Error: 'mkdir' command failed, %s\n" % str(err)) 459 exitCode = 1 460 return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False) 461 462 463def executeBuiltinRm(cmd, cmd_shenv): 464 """executeBuiltinRm - Removes (deletes) files or directories.""" 465 args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:] 466 try: 467 opts, args = getopt.gnu_getopt(args, "frR", ["--recursive"]) 468 except getopt.GetoptError as err: 469 raise InternalShellError(cmd, "Unsupported: 'rm': %s" % str(err)) 470 471 force = False 472 recursive = False 473 for o, a in opts: 474 if o == "-f": 475 force = True 476 elif o in ("-r", "-R", "--recursive"): 477 recursive = True 478 else: 479 assert False, "unhandled option" 480 481 if len(args) == 0: 482 raise InternalShellError(cmd, "Error: 'rm' is missing an operand") 483 484 def on_rm_error(func, path, exc_info): 485 # path contains the path of the file that couldn't be removed 486 # let's just assume that it's read-only and remove it. 487 os.chmod(path, stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE) 488 os.remove(path) 489 490 stderr = StringIO() 491 exitCode = 0 492 for path in args: 493 cwd = cmd_shenv.cwd 494 path = to_unicode(path) if kIsWindows else to_bytes(path) 495 cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd) 496 if not os.path.isabs(path): 497 path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path)) 498 if force and not os.path.exists(path): 499 continue 500 try: 501 if os.path.isdir(path): 502 if not recursive: 503 stderr.write("Error: %s is a directory\n" % path) 504 exitCode = 1 505 if platform.system() == "Windows": 506 # NOTE: use ctypes to access `SHFileOperationsW` on Windows to 507 # use the NT style path to get access to long file paths which 508 # cannot be removed otherwise. 509 from ctypes.wintypes import BOOL, HWND, LPCWSTR, UINT, WORD 510 from ctypes import addressof, byref, c_void_p, create_unicode_buffer 511 from ctypes import Structure 512 from ctypes import windll, WinError, POINTER 513 514 class SHFILEOPSTRUCTW(Structure): 515 _fields_ = [ 516 ("hWnd", HWND), 517 ("wFunc", UINT), 518 ("pFrom", LPCWSTR), 519 ("pTo", LPCWSTR), 520 ("fFlags", WORD), 521 ("fAnyOperationsAborted", BOOL), 522 ("hNameMappings", c_void_p), 523 ("lpszProgressTitle", LPCWSTR), 524 ] 525 526 FO_MOVE, FO_COPY, FO_DELETE, FO_RENAME = range(1, 5) 527 528 FOF_SILENT = 4 529 FOF_NOCONFIRMATION = 16 530 FOF_NOCONFIRMMKDIR = 512 531 FOF_NOERRORUI = 1024 532 533 FOF_NO_UI = ( 534 FOF_SILENT 535 | FOF_NOCONFIRMATION 536 | FOF_NOERRORUI 537 | FOF_NOCONFIRMMKDIR 538 ) 539 540 SHFileOperationW = windll.shell32.SHFileOperationW 541 SHFileOperationW.argtypes = [POINTER(SHFILEOPSTRUCTW)] 542 543 path = os.path.abspath(path) 544 545 pFrom = create_unicode_buffer(path, len(path) + 2) 546 pFrom[len(path)] = pFrom[len(path) + 1] = "\0" 547 operation = SHFILEOPSTRUCTW( 548 wFunc=UINT(FO_DELETE), 549 pFrom=LPCWSTR(addressof(pFrom)), 550 fFlags=FOF_NO_UI, 551 ) 552 result = SHFileOperationW(byref(operation)) 553 if result: 554 raise WinError(result) 555 else: 556 shutil.rmtree(path, onerror=on_rm_error if force else None) 557 else: 558 if force and not os.access(path, os.W_OK): 559 os.chmod(path, stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE) 560 os.remove(path) 561 except OSError as err: 562 stderr.write("Error: 'rm' command failed, %s" % str(err)) 563 exitCode = 1 564 return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False) 565 566 567def executeBuiltinColon(cmd, cmd_shenv): 568 """executeBuiltinColon - Discard arguments and exit with status 0.""" 569 return ShellCommandResult(cmd, "", "", 0, False) 570 571 572def processRedirects(cmd, stdin_source, cmd_shenv, opened_files): 573 """Return the standard fds for cmd after applying redirects 574 575 Returns the three standard file descriptors for the new child process. Each 576 fd may be an open, writable file object or a sentinel value from the 577 subprocess module. 578 """ 579 580 # Apply the redirections, we use (N,) as a sentinel to indicate stdin, 581 # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or 582 # from a file are represented with a list [file, mode, file-object] 583 # where file-object is initially None. 584 redirects = [(0,), (1,), (2,)] 585 for (op, filename) in cmd.redirects: 586 if op == (">", 2): 587 redirects[2] = [filename, "w", None] 588 elif op == (">>", 2): 589 redirects[2] = [filename, "a", None] 590 elif op == (">&", 2) and filename in "012": 591 redirects[2] = redirects[int(filename)] 592 elif op == (">&",) or op == ("&>",): 593 redirects[1] = redirects[2] = [filename, "w", None] 594 elif op == (">",): 595 redirects[1] = [filename, "w", None] 596 elif op == (">>",): 597 redirects[1] = [filename, "a", None] 598 elif op == ("<",): 599 redirects[0] = [filename, "r", None] 600 else: 601 raise InternalShellError( 602 cmd, "Unsupported redirect: %r" % ((op, filename),) 603 ) 604 605 # Open file descriptors in a second pass. 606 std_fds = [None, None, None] 607 for (index, r) in enumerate(redirects): 608 # Handle the sentinel values for defaults up front. 609 if isinstance(r, tuple): 610 if r == (0,): 611 fd = stdin_source 612 elif r == (1,): 613 if index == 0: 614 raise InternalShellError(cmd, "Unsupported redirect for stdin") 615 elif index == 1: 616 fd = subprocess.PIPE 617 else: 618 fd = subprocess.STDOUT 619 elif r == (2,): 620 if index != 2: 621 raise InternalShellError(cmd, "Unsupported redirect on stdout") 622 fd = subprocess.PIPE 623 else: 624 raise InternalShellError(cmd, "Bad redirect") 625 std_fds[index] = fd 626 continue 627 628 (filename, mode, fd) = r 629 630 # Check if we already have an open fd. This can happen if stdout and 631 # stderr go to the same place. 632 if fd is not None: 633 std_fds[index] = fd 634 continue 635 636 redir_filename = None 637 name = expand_glob(filename, cmd_shenv.cwd) 638 if len(name) != 1: 639 raise InternalShellError( 640 cmd, "Unsupported: glob in " "redirect expanded to multiple files" 641 ) 642 name = name[0] 643 if kAvoidDevNull and name == kDevNull: 644 fd = tempfile.TemporaryFile(mode=mode) 645 elif kIsWindows and name == "/dev/tty": 646 # Simulate /dev/tty on Windows. 647 # "CON" is a special filename for the console. 648 fd = open("CON", mode) 649 else: 650 # Make sure relative paths are relative to the cwd. 651 redir_filename = os.path.join(cmd_shenv.cwd, name) 652 redir_filename = ( 653 to_unicode(redir_filename) if kIsWindows else to_bytes(redir_filename) 654 ) 655 fd = open(redir_filename, mode) 656 # Workaround a Win32 and/or subprocess bug when appending. 657 # 658 # FIXME: Actually, this is probably an instance of PR6753. 659 if mode == "a": 660 fd.seek(0, 2) 661 # Mutate the underlying redirect list so that we can redirect stdout 662 # and stderr to the same place without opening the file twice. 663 r[2] = fd 664 opened_files.append((filename, mode, fd) + (redir_filename,)) 665 std_fds[index] = fd 666 667 return std_fds 668 669 670def _executeShCmd(cmd, shenv, results, timeoutHelper): 671 if timeoutHelper.timeoutReached(): 672 # Prevent further recursion if the timeout has been hit 673 # as we should try avoid launching more processes. 674 return None 675 676 if isinstance(cmd, ShUtil.Seq): 677 if cmd.op == ";": 678 res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper) 679 return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper) 680 681 if cmd.op == "&": 682 raise InternalShellError(cmd, "unsupported shell operator: '&'") 683 684 if cmd.op == "||": 685 res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper) 686 if res != 0: 687 res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper) 688 return res 689 690 if cmd.op == "&&": 691 res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper) 692 if res is None: 693 return res 694 695 if res == 0: 696 res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper) 697 return res 698 699 raise ValueError("Unknown shell command: %r" % cmd.op) 700 assert isinstance(cmd, ShUtil.Pipeline) 701 702 procs = [] 703 proc_not_counts = [] 704 default_stdin = subprocess.PIPE 705 stderrTempFiles = [] 706 opened_files = [] 707 named_temp_files = [] 708 builtin_commands = set(["cat", "diff"]) 709 builtin_commands_dir = os.path.join( 710 os.path.dirname(os.path.abspath(__file__)), "builtin_commands" 711 ) 712 inproc_builtins = { 713 "cd": executeBuiltinCd, 714 "export": executeBuiltinExport, 715 "echo": executeBuiltinEcho, 716 "@echo": executeBuiltinEcho, 717 "mkdir": executeBuiltinMkdir, 718 "popd": executeBuiltinPopd, 719 "pushd": executeBuiltinPushd, 720 "rm": executeBuiltinRm, 721 ":": executeBuiltinColon, 722 } 723 # To avoid deadlock, we use a single stderr stream for piped 724 # output. This is null until we have seen some output using 725 # stderr. 726 for i, j in enumerate(cmd.commands): 727 # Reference the global environment by default. 728 cmd_shenv = shenv 729 args = list(j.args) 730 not_args = [] 731 not_count = 0 732 not_crash = False 733 while True: 734 if args[0] == "env": 735 # Create a copy of the global environment and modify it for 736 # this one command. There might be multiple envs in a pipeline, 737 # and there might be multiple envs in a command (usually when 738 # one comes from a substitution): 739 # env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s 740 # env FOO=1 %{another_env_plus_cmd} | FileCheck %s 741 if cmd_shenv is shenv: 742 cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env) 743 args = updateEnv(cmd_shenv, args) 744 if not args: 745 # Return the environment variables if no argument is provided. 746 env_str = "\n".join( 747 f"{key}={value}" for key, value in sorted(cmd_shenv.env.items()) 748 ) 749 results.append( 750 ShellCommandResult( 751 j, env_str, "", 0, timeoutHelper.timeoutReached(), [] 752 ) 753 ) 754 return 0 755 elif args[0] == "not": 756 not_args.append(args.pop(0)) 757 not_count += 1 758 if args and args[0] == "--crash": 759 not_args.append(args.pop(0)) 760 not_crash = True 761 if not args: 762 raise InternalShellError(j, "Error: 'not' requires a" " subcommand") 763 elif args[0] == "!": 764 not_args.append(args.pop(0)) 765 not_count += 1 766 if not args: 767 raise InternalShellError(j, "Error: '!' requires a" " subcommand") 768 else: 769 break 770 771 # Handle in-process builtins. 772 # 773 # Handle "echo" as a builtin if it is not part of a pipeline. This 774 # greatly speeds up tests that construct input files by repeatedly 775 # echo-appending to a file. 776 # FIXME: Standardize on the builtin echo implementation. We can use a 777 # temporary file to sidestep blocking pipe write issues. 778 779 # Ensure args[0] is hashable. 780 args[0] = expand_glob(args[0], cmd_shenv.cwd)[0] 781 782 inproc_builtin = inproc_builtins.get(args[0], None) 783 if inproc_builtin and (args[0] != "echo" or len(cmd.commands) == 1): 784 # env calling an in-process builtin is useless, so we take the safe 785 # approach of complaining. 786 if not cmd_shenv is shenv: 787 raise InternalShellError( 788 j, "Error: 'env' cannot call '{}'".format(args[0]) 789 ) 790 if not_crash: 791 raise InternalShellError( 792 j, "Error: 'not --crash' cannot call" " '{}'".format(args[0]) 793 ) 794 if len(cmd.commands) != 1: 795 raise InternalShellError( 796 j, 797 "Unsupported: '{}' cannot be part" " of a pipeline".format(args[0]), 798 ) 799 result = inproc_builtin(Command(args, j.redirects), cmd_shenv) 800 if not_count % 2: 801 result.exitCode = int(not result.exitCode) 802 result.command.args = j.args 803 results.append(result) 804 return result.exitCode 805 806 # Resolve any out-of-process builtin command before adding back 'not' 807 # commands. 808 if args[0] in builtin_commands: 809 args.insert(0, sys.executable) 810 cmd_shenv.env["PYTHONPATH"] = os.path.dirname(os.path.abspath(__file__)) 811 args[1] = os.path.join(builtin_commands_dir, args[1] + ".py") 812 813 # We had to search through the 'not' commands to find all the 'env' 814 # commands and any other in-process builtin command. We don't want to 815 # reimplement 'not' and its '--crash' here, so just push all 'not' 816 # commands back to be called as external commands. Because this 817 # approach effectively moves all 'env' commands up front, it relies on 818 # the assumptions that (1) environment variables are not intended to be 819 # relevant to 'not' commands and (2) the 'env' command should always 820 # blindly pass along the status it receives from any command it calls. 821 822 # For plain negations, either 'not' without '--crash', or the shell 823 # operator '!', leave them out from the command to execute and 824 # invert the result code afterwards. 825 if not_crash: 826 args = not_args + args 827 not_count = 0 828 else: 829 not_args = [] 830 831 stdin, stdout, stderr = processRedirects( 832 j, default_stdin, cmd_shenv, opened_files 833 ) 834 835 # If stderr wants to come from stdout, but stdout isn't a pipe, then put 836 # stderr on a pipe and treat it as stdout. 837 if stderr == subprocess.STDOUT and stdout != subprocess.PIPE: 838 stderr = subprocess.PIPE 839 stderrIsStdout = True 840 else: 841 stderrIsStdout = False 842 843 # Don't allow stderr on a PIPE except for the last 844 # process, this could deadlock. 845 # 846 # FIXME: This is slow, but so is deadlock. 847 if stderr == subprocess.PIPE and j != cmd.commands[-1]: 848 stderr = tempfile.TemporaryFile(mode="w+b") 849 stderrTempFiles.append((i, stderr)) 850 851 # Resolve the executable path ourselves. 852 executable = None 853 # For paths relative to cwd, use the cwd of the shell environment. 854 if args[0].startswith("."): 855 exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0]) 856 if os.path.isfile(exe_in_cwd): 857 executable = exe_in_cwd 858 if not executable: 859 executable = lit.util.which(args[0], cmd_shenv.env["PATH"]) 860 if not executable: 861 raise InternalShellError(j, "%r: command not found" % args[0]) 862 863 # Replace uses of /dev/null with temporary files. 864 if kAvoidDevNull: 865 # In Python 2.x, basestring is the base class for all string (including unicode) 866 # In Python 3.x, basestring no longer exist and str is always unicode 867 try: 868 str_type = basestring 869 except NameError: 870 str_type = str 871 for i, arg in enumerate(args): 872 if isinstance(arg, str_type) and kDevNull in arg: 873 f = tempfile.NamedTemporaryFile(delete=False) 874 f.close() 875 named_temp_files.append(f.name) 876 args[i] = arg.replace(kDevNull, f.name) 877 878 # Expand all glob expressions 879 args = expand_glob_expressions(args, cmd_shenv.cwd) 880 881 # On Windows, do our own command line quoting for better compatibility 882 # with some core utility distributions. 883 if kIsWindows: 884 args = quote_windows_command(args) 885 886 try: 887 procs.append( 888 subprocess.Popen( 889 args, 890 cwd=cmd_shenv.cwd, 891 executable=executable, 892 stdin=stdin, 893 stdout=stdout, 894 stderr=stderr, 895 env=cmd_shenv.env, 896 close_fds=kUseCloseFDs, 897 universal_newlines=True, 898 errors="replace", 899 ) 900 ) 901 proc_not_counts.append(not_count) 902 # Let the helper know about this process 903 timeoutHelper.addProcess(procs[-1]) 904 except OSError as e: 905 raise InternalShellError( 906 j, "Could not create process ({}) due to {}".format(executable, e) 907 ) 908 909 # Immediately close stdin for any process taking stdin from us. 910 if stdin == subprocess.PIPE: 911 procs[-1].stdin.close() 912 procs[-1].stdin = None 913 914 # Update the current stdin source. 915 if stdout == subprocess.PIPE: 916 default_stdin = procs[-1].stdout 917 elif stderrIsStdout: 918 default_stdin = procs[-1].stderr 919 else: 920 default_stdin = subprocess.PIPE 921 922 # Explicitly close any redirected files. We need to do this now because we 923 # need to release any handles we may have on the temporary files (important 924 # on Win32, for example). Since we have already spawned the subprocess, our 925 # handles have already been transferred so we do not need them anymore. 926 for (name, mode, f, path) in opened_files: 927 f.close() 928 929 # FIXME: There is probably still deadlock potential here. Yawn. 930 procData = [None] * len(procs) 931 procData[-1] = procs[-1].communicate() 932 933 for i in range(len(procs) - 1): 934 if procs[i].stdout is not None: 935 out = procs[i].stdout.read() 936 else: 937 out = "" 938 if procs[i].stderr is not None: 939 err = procs[i].stderr.read() 940 else: 941 err = "" 942 procData[i] = (out, err) 943 944 # Read stderr out of the temp files. 945 for i, f in stderrTempFiles: 946 f.seek(0, 0) 947 procData[i] = (procData[i][0], f.read()) 948 f.close() 949 950 exitCode = None 951 for i, (out, err) in enumerate(procData): 952 res = procs[i].wait() 953 # Detect Ctrl-C in subprocess. 954 if res == -signal.SIGINT: 955 raise KeyboardInterrupt 956 if proc_not_counts[i] % 2: 957 res = 1 if res == 0 else 0 958 elif proc_not_counts[i] > 1: 959 res = 1 if res != 0 else 0 960 961 # Ensure the resulting output is always of string type. 962 try: 963 if out is None: 964 out = "" 965 else: 966 out = to_string(out.decode("utf-8", errors="replace")) 967 except: 968 out = str(out) 969 try: 970 if err is None: 971 err = "" 972 else: 973 err = to_string(err.decode("utf-8", errors="replace")) 974 except: 975 err = str(err) 976 977 # Gather the redirected output files for failed commands. 978 output_files = [] 979 if res != 0: 980 for (name, mode, f, path) in sorted(opened_files): 981 if path is not None and mode in ("w", "a"): 982 try: 983 with open(path, "rb") as f: 984 data = f.read() 985 except: 986 data = None 987 if data is not None: 988 output_files.append((name, path, data)) 989 990 results.append( 991 ShellCommandResult( 992 cmd.commands[i], 993 out, 994 err, 995 res, 996 timeoutHelper.timeoutReached(), 997 output_files, 998 ) 999 ) 1000 if cmd.pipe_err: 1001 # Take the last failing exit code from the pipeline. 1002 if not exitCode or res != 0: 1003 exitCode = res 1004 else: 1005 exitCode = res 1006 1007 # Remove any named temporary files we created. 1008 for f in named_temp_files: 1009 try: 1010 os.remove(f) 1011 except OSError: 1012 pass 1013 1014 if cmd.negate: 1015 exitCode = not exitCode 1016 1017 return exitCode 1018 1019 1020def findColor(line, curr_color): 1021 start = line.rfind("\33[") 1022 if start == -1: 1023 return curr_color 1024 end = line.find("m", start + 2) 1025 if end == -1: 1026 return curr_color 1027 match = line[start : end + 1] 1028 # "\33[0m" means "reset all formatting". Sometimes the 0 is skipped. 1029 if match == "\33[m" or match == "\33[0m": 1030 return None 1031 return match 1032 1033 1034def formatOutput(title, data, limit=None): 1035 if not data.strip(): 1036 return "" 1037 if not limit is None and len(data) > limit: 1038 data = data[:limit] + "\n...\n" 1039 msg = "data was truncated" 1040 else: 1041 msg = "" 1042 ndashes = 30 1043 # fmt: off 1044 out = f"# .---{title}{'-' * (ndashes - 4 - len(title))}\n" 1045 curr_color = None 1046 for line in data.splitlines(): 1047 if curr_color: 1048 out += "\33[0m" 1049 out += "# | " 1050 if curr_color: 1051 out += curr_color 1052 out += line + "\n" 1053 curr_color = findColor(line, curr_color) 1054 if curr_color: 1055 out += "\33[0m" # prevent unterminated formatting from leaking 1056 out += f"# `---{msg}{'-' * (ndashes - 4 - len(msg))}\n" 1057 # fmt: on 1058 return out 1059 1060 1061# Always either returns the tuple (out, err, exitCode, timeoutInfo) or raises a 1062# ScriptFatal exception. 1063# 1064# If debug is True (the normal lit behavior), err is empty, and out contains an 1065# execution trace, including stdout and stderr shown per command executed. 1066# 1067# If debug is False (set by some custom lit test formats that call this 1068# function), out contains only stdout from the script, err contains only stderr 1069# from the script, and there is no execution trace. 1070def executeScriptInternal( 1071 test, litConfig, tmpBase, commands, cwd, debug=True 1072) -> Tuple[str, str, int, Optional[str]]: 1073 cmds = [] 1074 for i, ln in enumerate(commands): 1075 # Within lit, we try to always add '%dbg(...)' to command lines in order 1076 # to maximize debuggability. However, custom lit test formats might not 1077 # always add it, so add a generic debug message in that case. 1078 match = re.fullmatch(kPdbgRegex, ln) 1079 if match: 1080 dbg = match.group(1) 1081 command = match.group(2) 1082 else: 1083 dbg = "command line" 1084 command = ln 1085 if debug: 1086 ln = f"@echo '# {dbg}' " 1087 if command: 1088 ln += f"&& @echo {shlex.quote(command.lstrip())} && {command}" 1089 else: 1090 ln += "has no command after substitutions" 1091 else: 1092 ln = command 1093 try: 1094 cmds.append( 1095 ShUtil.ShParser(ln, litConfig.isWindows, test.config.pipefail).parse() 1096 ) 1097 except: 1098 raise ScriptFatal( 1099 f"shell parser error on {dbg}: {command.lstrip()}\n" 1100 ) from None 1101 1102 cmd = cmds[0] 1103 for c in cmds[1:]: 1104 cmd = ShUtil.Seq(cmd, "&&", c) 1105 1106 results = [] 1107 timeoutInfo = None 1108 try: 1109 shenv = ShellEnvironment(cwd, test.config.environment) 1110 exitCode, timeoutInfo = executeShCmd( 1111 cmd, shenv, results, timeout=litConfig.maxIndividualTestTime 1112 ) 1113 except InternalShellError: 1114 e = sys.exc_info()[1] 1115 exitCode = 127 1116 results.append(ShellCommandResult(e.command, "", e.message, exitCode, False)) 1117 1118 out = err = "" 1119 for i, result in enumerate(results): 1120 if not debug: 1121 out += result.stdout 1122 err += result.stderr 1123 continue 1124 1125 # The purpose of an "@echo" command is merely to add a debugging message 1126 # directly to lit's output. It is used internally by lit's internal 1127 # shell and is not currently documented for use in lit tests. However, 1128 # if someone misuses it (e.g., both "echo" and "@echo" complain about 1129 # stdin redirection), produce the normal execution trace to facilitate 1130 # debugging. 1131 if ( 1132 result.command.args[0] == "@echo" 1133 and result.exitCode == 0 1134 and not result.stderr 1135 and not result.outputFiles 1136 and not result.timeoutReached 1137 ): 1138 out += result.stdout 1139 continue 1140 1141 # Write the command line that was run. Properly quote it. Leading 1142 # "!" commands should not be quoted as that would indicate they are not 1143 # the builtins. 1144 out += "# executed command: " 1145 nLeadingBangs = next( 1146 (i for i, cmd in enumerate(result.command.args) if cmd != "!"), 1147 len(result.command.args), 1148 ) 1149 out += "! " * nLeadingBangs 1150 out += " ".join( 1151 shlex.quote(str(s)) 1152 for i, s in enumerate(result.command.args) 1153 if i >= nLeadingBangs 1154 ) 1155 out += "\n" 1156 1157 # If nothing interesting happened, move on. 1158 if ( 1159 litConfig.maxIndividualTestTime == 0 1160 and result.exitCode == 0 1161 and not result.stdout.strip() 1162 and not result.stderr.strip() 1163 ): 1164 continue 1165 1166 # Otherwise, something failed or was printed, show it. 1167 1168 # Add the command output, if redirected. 1169 for (name, path, data) in result.outputFiles: 1170 data = to_string(data.decode("utf-8", errors="replace")) 1171 out += formatOutput(f"redirected output from '{name}'", data, limit=1024) 1172 if result.stdout.strip(): 1173 out += formatOutput("command stdout", result.stdout) 1174 if result.stderr.strip(): 1175 out += formatOutput("command stderr", result.stderr) 1176 if not result.stdout.strip() and not result.stderr.strip(): 1177 out += "# note: command had no output on stdout or stderr\n" 1178 1179 # Show the error conditions: 1180 if result.exitCode != 0: 1181 # On Windows, a negative exit code indicates a signal, and those are 1182 # easier to recognize or look up if we print them in hex. 1183 if litConfig.isWindows and (result.exitCode < 0 or result.exitCode > 255): 1184 codeStr = hex(int(result.exitCode & 0xFFFFFFFF)).rstrip("L") 1185 else: 1186 codeStr = str(result.exitCode) 1187 out += "# error: command failed with exit status: %s\n" % (codeStr,) 1188 if litConfig.maxIndividualTestTime > 0 and result.timeoutReached: 1189 out += "# error: command reached timeout: %s\n" % ( 1190 str(result.timeoutReached), 1191 ) 1192 1193 return out, err, exitCode, timeoutInfo 1194 1195 1196def executeScript(test, litConfig, tmpBase, commands, cwd): 1197 bashPath = litConfig.getBashPath() 1198 isWin32CMDEXE = litConfig.isWindows and not bashPath 1199 script = tmpBase + ".script" 1200 if isWin32CMDEXE: 1201 script += ".bat" 1202 1203 # Write script file 1204 mode = "w" 1205 open_kwargs = {} 1206 if litConfig.isWindows and not isWin32CMDEXE: 1207 mode += "b" # Avoid CRLFs when writing bash scripts. 1208 else: 1209 open_kwargs["encoding"] = "utf-8" 1210 f = open(script, mode, **open_kwargs) 1211 if isWin32CMDEXE: 1212 for i, ln in enumerate(commands): 1213 match = re.fullmatch(kPdbgRegex, ln) 1214 if match: 1215 command = match.group(2) 1216 commands[i] = match.expand( 1217 "echo '\\1' > nul && " if command else "echo '\\1' > nul" 1218 ) 1219 f.write("@echo on\n") 1220 f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands)) 1221 else: 1222 for i, ln in enumerate(commands): 1223 match = re.fullmatch(kPdbgRegex, ln) 1224 if match: 1225 dbg = match.group(1) 1226 command = match.group(2) 1227 # Echo the debugging diagnostic to stderr. 1228 # 1229 # For that echo command, use 'set' commands to suppress the 1230 # shell's execution trace, which would just add noise. Suppress 1231 # the shell's execution trace for the 'set' commands by 1232 # redirecting their stderr to /dev/null. 1233 if command: 1234 msg = f"'{dbg}': {shlex.quote(command.lstrip())}" 1235 else: 1236 msg = f"'{dbg}' has no command after substitutions" 1237 commands[i] = ( 1238 f"{{ set +x; }} 2>/dev/null && " 1239 f"echo {msg} >&2 && " 1240 f"{{ set -x; }} 2>/dev/null" 1241 ) 1242 # Execute the command, if any. 1243 # 1244 # 'command' might be something like: 1245 # 1246 # subcmd & PID=$! 1247 # 1248 # In that case, we need something like: 1249 # 1250 # echo_dbg && { subcmd & PID=$!; } 1251 # 1252 # Without the '{ ...; }' enclosing the original 'command', '&' 1253 # would put all of 'echo_dbg && subcmd' in the background. This 1254 # would cause 'echo_dbg' to execute at the wrong time, and a 1255 # later kill of $PID would target the wrong process. We have 1256 # seen the latter manage to terminate the shell running lit. 1257 if command: 1258 commands[i] += f" && {{ {command}; }}" 1259 if test.config.pipefail: 1260 f.write(b"set -o pipefail;" if mode == "wb" else "set -o pipefail;") 1261 1262 # Manually export any DYLD_* variables used by dyld on macOS because 1263 # otherwise they are lost when the shell executable is run, before the 1264 # lit test is executed. 1265 env_str = "\n".join( 1266 "export {}={};".format(k, shlex.quote(v)) 1267 for k, v in test.config.environment.items() 1268 if k.startswith("DYLD_") 1269 ) 1270 f.write(bytes(env_str, "utf-8") if mode == "wb" else env_str) 1271 f.write(b"set -x;" if mode == "wb" else "set -x;") 1272 if mode == "wb": 1273 f.write(bytes("{ " + "; } &&\n{ ".join(commands) + "; }", "utf-8")) 1274 else: 1275 f.write("{ " + "; } &&\n{ ".join(commands) + "; }") 1276 f.write(b"\n" if mode == "wb" else "\n") 1277 f.close() 1278 1279 if isWin32CMDEXE: 1280 command = ["cmd", "/c", script] 1281 else: 1282 if bashPath: 1283 command = [bashPath, script] 1284 else: 1285 command = ["/bin/sh", script] 1286 if litConfig.useValgrind: 1287 # FIXME: Running valgrind on sh is overkill. We probably could just 1288 # run on clang with no real loss. 1289 command = litConfig.valgrindArgs + command 1290 1291 try: 1292 out, err, exitCode = lit.util.executeCommand( 1293 command, 1294 cwd=cwd, 1295 env=test.config.environment, 1296 timeout=litConfig.maxIndividualTestTime, 1297 ) 1298 return (out, err, exitCode, None) 1299 except lit.util.ExecuteCommandTimeoutException as e: 1300 return (e.out, e.err, e.exitCode, e.msg) 1301 1302 1303def parseIntegratedTestScriptCommands(source_path, keywords): 1304 """ 1305 parseIntegratedTestScriptCommands(source_path) -> commands 1306 1307 Parse the commands in an integrated test script file into a list of 1308 (line_number, command_type, line). 1309 """ 1310 1311 # This code is carefully written to be dual compatible with Python 2.5+ and 1312 # Python 3 without requiring input files to always have valid codings. The 1313 # trick we use is to open the file in binary mode and use the regular 1314 # expression library to find the commands, with it scanning strings in 1315 # Python2 and bytes in Python3. 1316 # 1317 # Once we find a match, we do require each script line to be decodable to 1318 # UTF-8, so we convert the outputs to UTF-8 before returning. This way the 1319 # remaining code can work with "strings" agnostic of the executing Python 1320 # version. 1321 1322 keywords_re = re.compile( 1323 to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),)) 1324 ) 1325 1326 f = open(source_path, "rb") 1327 try: 1328 # Read the entire file contents. 1329 data = f.read() 1330 1331 # Ensure the data ends with a newline. 1332 if not data.endswith(to_bytes("\n")): 1333 data = data + to_bytes("\n") 1334 1335 # Iterate over the matches. 1336 line_number = 1 1337 last_match_position = 0 1338 for match in keywords_re.finditer(data): 1339 # Compute the updated line number by counting the intervening 1340 # newlines. 1341 match_position = match.start() 1342 line_number += data.count( 1343 to_bytes("\n"), last_match_position, match_position 1344 ) 1345 last_match_position = match_position 1346 1347 # Convert the keyword and line to UTF-8 strings and yield the 1348 # command. Note that we take care to return regular strings in 1349 # Python 2, to avoid other code having to differentiate between the 1350 # str and unicode types. 1351 # 1352 # Opening the file in binary mode prevented Windows \r newline 1353 # characters from being converted to Unix \n newlines, so manually 1354 # strip those from the yielded lines. 1355 keyword, ln = match.groups() 1356 yield ( 1357 line_number, 1358 to_string(keyword.decode("utf-8")), 1359 to_string(ln.decode("utf-8").rstrip("\r")), 1360 ) 1361 finally: 1362 f.close() 1363 1364 1365def getTempPaths(test): 1366 """Get the temporary location, this is always relative to the test suite 1367 root, not test source root.""" 1368 execpath = test.getExecPath() 1369 execdir, execbase = os.path.split(execpath) 1370 tmpDir = os.path.join(execdir, "Output") 1371 tmpBase = os.path.join(tmpDir, execbase) 1372 return tmpDir, tmpBase 1373 1374 1375def colonNormalizePath(path): 1376 if kIsWindows: 1377 return re.sub(r"^(.):", r"\1", path.replace("\\", "/")) 1378 else: 1379 assert path[0] == "/" 1380 return path[1:] 1381 1382 1383def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False): 1384 sourcepath = test.getSourcePath() 1385 sourcedir = os.path.dirname(sourcepath) 1386 1387 # Normalize slashes, if requested. 1388 if normalize_slashes: 1389 sourcepath = sourcepath.replace("\\", "/") 1390 sourcedir = sourcedir.replace("\\", "/") 1391 tmpDir = tmpDir.replace("\\", "/") 1392 tmpBase = tmpBase.replace("\\", "/") 1393 1394 substitutions = [] 1395 substitutions.extend(test.config.substitutions) 1396 tmpName = tmpBase + ".tmp" 1397 tmpBaseName = os.path.basename(tmpBase) 1398 sourceBaseName = os.path.basename(sourcepath) 1399 1400 substitutions.append(("%{pathsep}", os.pathsep)) 1401 substitutions.append(("%basename_t", tmpBaseName)) 1402 1403 substitutions.append(("%{s:basename}", sourceBaseName)) 1404 substitutions.append(("%{t:stem}", tmpBaseName)) 1405 1406 substitutions.extend( 1407 [ 1408 ("%{fs-src-root}", pathlib.Path(sourcedir).anchor), 1409 ("%{fs-tmp-root}", pathlib.Path(tmpBase).anchor), 1410 ("%{fs-sep}", os.path.sep), 1411 ] 1412 ) 1413 1414 substitutions.append(("%/et", tmpName.replace("\\", "\\\\\\\\\\\\\\\\"))) 1415 1416 def regex_escape(s): 1417 s = s.replace("@", r"\@") 1418 s = s.replace("&", r"\&") 1419 return s 1420 1421 path_substitutions = [ 1422 ("s", sourcepath), ("S", sourcedir), ("p", sourcedir), 1423 ("t", tmpName), ("T", tmpDir) 1424 ] 1425 for path_substitution in path_substitutions: 1426 letter = path_substitution[0] 1427 path = path_substitution[1] 1428 1429 # Original path variant 1430 substitutions.append(("%" + letter, path)) 1431 1432 # Normalized path separator variant 1433 substitutions.append(("%/" + letter, path.replace("\\", "/"))) 1434 1435 # realpath variants 1436 # Windows paths with substitute drives are not expanded by default 1437 # as they are used to avoid MAX_PATH issues, but sometimes we do 1438 # need the fully expanded path. 1439 real_path = os.path.realpath(path) 1440 substitutions.append(("%{" + letter + ":real}", real_path)) 1441 substitutions.append(("%{/" + letter + ":real}", 1442 real_path.replace("\\", "/"))) 1443 1444 # "%{/[STpst]:regex_replacement}" should be normalized like 1445 # "%/[STpst]" but we're also in a regex replacement context 1446 # of a s@@@ regex. 1447 substitutions.append( 1448 ("%{/" + letter + ":regex_replacement}", 1449 regex_escape(path.replace("\\", "/")))) 1450 1451 # "%:[STpst]" are normalized paths without colons and without 1452 # a leading slash. 1453 substitutions.append(("%:" + letter, colonNormalizePath(path))) 1454 1455 return substitutions 1456 1457 1458def _memoize(f): 1459 cache = {} # Intentionally unbounded, see applySubstitutions() 1460 1461 def memoized(x): 1462 if x not in cache: 1463 cache[x] = f(x) 1464 return cache[x] 1465 1466 return memoized 1467 1468 1469@_memoize 1470def _caching_re_compile(r): 1471 return re.compile(r) 1472 1473 1474class ExpandableScriptDirective(object): 1475 """ 1476 Common interface for lit directives for which any lit substitutions must be 1477 expanded to produce the shell script. It includes directives (e.g., 'RUN:') 1478 specifying shell commands that might have lit substitutions to be expanded. 1479 It also includes lit directives (e.g., 'DEFINE:') that adjust substitutions. 1480 1481 start_line_number: The directive's starting line number. 1482 end_line_number: The directive's ending line number, which is 1483 start_line_number if the directive has no line continuations. 1484 keyword: The keyword that specifies the directive. For example, 'RUN:'. 1485 """ 1486 1487 def __init__(self, start_line_number, end_line_number, keyword): 1488 # Input line number where the directive starts. 1489 self.start_line_number = start_line_number 1490 # Input line number where the directive ends. 1491 self.end_line_number = end_line_number 1492 # The keyword used to indicate the directive. 1493 self.keyword = keyword 1494 1495 def add_continuation(self, line_number, keyword, line): 1496 """ 1497 Add a continuation line to this directive and return True, or do nothing 1498 and return False if the specified line is not a continuation for this 1499 directive (e.g., previous line does not end in '\', or keywords do not 1500 match). 1501 1502 line_number: The line number for the continuation line. 1503 keyword: The keyword that specifies the continuation line. For example, 1504 'RUN:'. 1505 line: The content of the continuation line after the keyword. 1506 """ 1507 assert False, "expected method to be called on derived class" 1508 1509 def needs_continuation(self): 1510 """ 1511 Does this directive require a continuation line? 1512 1513 '\' is documented as indicating a line continuation even if whitespace 1514 separates it from the newline. It looks like a line continuation, and 1515 it would be confusing if it didn't behave as one. 1516 """ 1517 assert False, "expected method to be called on derived class" 1518 1519 def get_location(self): 1520 """ 1521 Get a phrase describing the line or range of lines so far included by 1522 this directive and any line continuations. 1523 """ 1524 if self.start_line_number == self.end_line_number: 1525 return f"at line {self.start_line_number}" 1526 return f"from line {self.start_line_number} to {self.end_line_number}" 1527 1528 1529class CommandDirective(ExpandableScriptDirective): 1530 """ 1531 A lit directive taking a shell command line. For example, 1532 'RUN: echo hello world'. 1533 1534 command: The content accumulated so far from the directive and its 1535 continuation lines. 1536 """ 1537 1538 def __init__(self, start_line_number, end_line_number, keyword, line): 1539 super().__init__(start_line_number, end_line_number, keyword) 1540 self.command = line.rstrip() 1541 1542 def add_continuation(self, line_number, keyword, line): 1543 if keyword != self.keyword or not self.needs_continuation(): 1544 return False 1545 self.command = self.command[:-1] + line.rstrip() 1546 self.end_line_number = line_number 1547 return True 1548 1549 def needs_continuation(self): 1550 # Trailing whitespace is stripped immediately when each line is added, 1551 # so '\' is never hidden here. 1552 return self.command[-1] == "\\" 1553 1554 1555class SubstDirective(ExpandableScriptDirective): 1556 """ 1557 A lit directive taking a substitution definition or redefinition. For 1558 example, 'DEFINE: %{name} = value'. 1559 1560 new_subst: True if this directive defines a new substitution. False if it 1561 redefines an existing substitution. 1562 body: The unparsed content accumulated so far from the directive and its 1563 continuation lines. 1564 name: The substitution's name, or None if more continuation lines are still 1565 required. 1566 value: The substitution's value, or None if more continuation lines are 1567 still required. 1568 """ 1569 1570 def __init__(self, start_line_number, end_line_number, keyword, new_subst, line): 1571 super().__init__(start_line_number, end_line_number, keyword) 1572 self.new_subst = new_subst 1573 self.body = line 1574 self.name = None 1575 self.value = None 1576 self._parse_body() 1577 1578 def add_continuation(self, line_number, keyword, line): 1579 if keyword != self.keyword or not self.needs_continuation(): 1580 return False 1581 if not line.strip(): 1582 raise ValueError("Substitution's continuation is empty") 1583 # Append line. Replace the '\' and any adjacent whitespace with a 1584 # single space. 1585 self.body = self.body.rstrip()[:-1].rstrip() + " " + line.lstrip() 1586 self.end_line_number = line_number 1587 self._parse_body() 1588 return True 1589 1590 def needs_continuation(self): 1591 return self.body.rstrip()[-1:] == "\\" 1592 1593 def _parse_body(self): 1594 """ 1595 If no more line continuations are required, parse all the directive's 1596 accumulated lines in order to identify the substitution's name and full 1597 value, and raise an exception if invalid. 1598 """ 1599 if self.needs_continuation(): 1600 return 1601 1602 # Extract the left-hand side and value, and discard any whitespace 1603 # enclosing each. 1604 parts = self.body.split("=", 1) 1605 if len(parts) == 1: 1606 raise ValueError("Substitution's definition does not contain '='") 1607 self.name = parts[0].strip() 1608 self.value = parts[1].strip() 1609 1610 # Check the substitution's name. 1611 # 1612 # Do not extend this to permit '.' or any sequence that's special in a 1613 # python pattern. We could escape that automatically for 1614 # DEFINE/REDEFINE directives in test files. However, lit configuration 1615 # file authors would still have to remember to escape them manually in 1616 # substitution names but not in values. Moreover, the manually chosen 1617 # and automatically chosen escape sequences would have to be consistent 1618 # (e.g., '\.' vs. '[.]') in order for REDEFINE to successfully redefine 1619 # a substitution previously defined by a lit configuration file. All 1620 # this seems too error prone and confusing to be worthwhile. If you 1621 # want your name to express structure, use ':' instead of '.'. 1622 # 1623 # Actually, '{' and '}' are special if they contain only digits possibly 1624 # separated by a comma. Requiring a leading letter avoids that. 1625 if not re.fullmatch(r"%{[_a-zA-Z][-_:0-9a-zA-Z]*}", self.name): 1626 raise ValueError( 1627 f"Substitution name '{self.name}' is malformed as it must " 1628 f"start with '%{{', it must end with '}}', and the rest must " 1629 f"start with a letter or underscore and contain only " 1630 f"alphanumeric characters, hyphens, underscores, and colons" 1631 ) 1632 1633 def adjust_substitutions(self, substitutions): 1634 """ 1635 Modify the specified substitution list as specified by this directive. 1636 """ 1637 assert ( 1638 not self.needs_continuation() 1639 ), "expected directive continuations to be parsed before applying" 1640 existing = [i for i, subst in enumerate(substitutions) if self.name in subst[0]] 1641 existing_res = "".join( 1642 "\nExisting pattern: " + substitutions[i][0] for i in existing 1643 ) 1644 if self.new_subst: 1645 if existing: 1646 raise ValueError( 1647 f"Substitution whose pattern contains '{self.name}' is " 1648 f"already defined before '{self.keyword}' directive " 1649 f"{self.get_location()}" 1650 f"{existing_res}" 1651 ) 1652 substitutions.insert(0, (self.name, self.value)) 1653 return 1654 if len(existing) > 1: 1655 raise ValueError( 1656 f"Multiple substitutions whose patterns contain '{self.name}' " 1657 f"are defined before '{self.keyword}' directive " 1658 f"{self.get_location()}" 1659 f"{existing_res}" 1660 ) 1661 if not existing: 1662 raise ValueError( 1663 f"No substitution for '{self.name}' is defined before " 1664 f"'{self.keyword}' directive {self.get_location()}" 1665 ) 1666 if substitutions[existing[0]][0] != self.name: 1667 raise ValueError( 1668 f"Existing substitution whose pattern contains '{self.name}' " 1669 f"does not have the pattern specified by '{self.keyword}' " 1670 f"directive {self.get_location()}\n" 1671 f"Expected pattern: {self.name}" 1672 f"{existing_res}" 1673 ) 1674 substitutions[existing[0]] = (self.name, self.value) 1675 1676 1677def applySubstitutions(script, substitutions, conditions={}, recursion_limit=None): 1678 """ 1679 Apply substitutions to the script. Allow full regular expression syntax. 1680 Replace each matching occurrence of regular expression pattern a with 1681 substitution b in line ln. 1682 1683 If a substitution expands into another substitution, it is expanded 1684 recursively until the line has no more expandable substitutions. If 1685 the line can still can be substituted after being substituted 1686 `recursion_limit` times, it is an error. If the `recursion_limit` is 1687 `None` (the default), no recursive substitution is performed at all. 1688 """ 1689 1690 # We use #_MARKER_# to hide %% while we do the other substitutions. 1691 def escapePercents(ln): 1692 return _caching_re_compile("%%").sub("#_MARKER_#", ln) 1693 1694 def unescapePercents(ln): 1695 return _caching_re_compile("#_MARKER_#").sub("%", ln) 1696 1697 def substituteIfElse(ln): 1698 # early exit to avoid wasting time on lines without 1699 # conditional substitutions 1700 if ln.find("%if ") == -1: 1701 return ln 1702 1703 def tryParseIfCond(ln): 1704 # space is important to not conflict with other (possible) 1705 # substitutions 1706 if not ln.startswith("%if "): 1707 return None, ln 1708 ln = ln[4:] 1709 1710 # stop at '%{' 1711 match = _caching_re_compile("%{").search(ln) 1712 if not match: 1713 raise ValueError("'%{' is missing for %if substitution") 1714 cond = ln[: match.start()] 1715 1716 # eat '%{' as well 1717 ln = ln[match.end() :] 1718 return cond, ln 1719 1720 def tryParseElse(ln): 1721 match = _caching_re_compile(r"^\s*%else\s*(%{)?").search(ln) 1722 if not match: 1723 return False, ln 1724 if not match.group(1): 1725 raise ValueError("'%{' is missing for %else substitution") 1726 return True, ln[match.end() :] 1727 1728 def tryParseEnd(ln): 1729 if ln.startswith("%}"): 1730 return True, ln[2:] 1731 return False, ln 1732 1733 def parseText(ln, isNested): 1734 # parse everything until %if, or %} if we're parsing a 1735 # nested expression. 1736 match = _caching_re_compile( 1737 "(.*?)(?:%if|%})" if isNested else "(.*?)(?:%if)" 1738 ).search(ln) 1739 if not match: 1740 # there is no terminating pattern, so treat the whole 1741 # line as text 1742 return ln, "" 1743 text_end = match.end(1) 1744 return ln[:text_end], ln[text_end:] 1745 1746 def parseRecursive(ln, isNested): 1747 result = "" 1748 while len(ln): 1749 if isNested: 1750 found_end, _ = tryParseEnd(ln) 1751 if found_end: 1752 break 1753 1754 # %if cond %{ branch_if %} %else %{ branch_else %} 1755 cond, ln = tryParseIfCond(ln) 1756 if cond: 1757 branch_if, ln = parseRecursive(ln, isNested=True) 1758 found_end, ln = tryParseEnd(ln) 1759 if not found_end: 1760 raise ValueError("'%}' is missing for %if substitution") 1761 1762 branch_else = "" 1763 found_else, ln = tryParseElse(ln) 1764 if found_else: 1765 branch_else, ln = parseRecursive(ln, isNested=True) 1766 found_end, ln = tryParseEnd(ln) 1767 if not found_end: 1768 raise ValueError("'%}' is missing for %else substitution") 1769 1770 if BooleanExpression.evaluate(cond, conditions): 1771 result += branch_if 1772 else: 1773 result += branch_else 1774 continue 1775 1776 # The rest is handled as plain text. 1777 text, ln = parseText(ln, isNested) 1778 result += text 1779 1780 return result, ln 1781 1782 result, ln = parseRecursive(ln, isNested=False) 1783 assert len(ln) == 0 1784 return result 1785 1786 def processLine(ln): 1787 # Apply substitutions 1788 ln = substituteIfElse(escapePercents(ln)) 1789 for a, b in substitutions: 1790 b = b.replace("\\", "\\\\") 1791 # re.compile() has a built-in LRU cache with 512 entries. In some 1792 # test suites lit ends up thrashing that cache, which made e.g. 1793 # check-llvm run 50% slower. Use an explicit, unbounded cache 1794 # to prevent that from happening. Since lit is fairly 1795 # short-lived, since the set of substitutions is fairly small, and 1796 # since thrashing has such bad consequences, not bounding the cache 1797 # seems reasonable. 1798 ln = _caching_re_compile(a).sub(str(b), escapePercents(ln)) 1799 1800 # Strip the trailing newline and any extra whitespace. 1801 return ln.strip() 1802 1803 def processLineToFixedPoint(ln): 1804 assert isinstance(recursion_limit, int) and recursion_limit >= 0 1805 origLine = ln 1806 steps = 0 1807 processed = processLine(ln) 1808 while processed != ln and steps < recursion_limit: 1809 ln = processed 1810 processed = processLine(ln) 1811 steps += 1 1812 1813 if processed != ln: 1814 raise ValueError( 1815 "Recursive substitution of '%s' did not complete " 1816 "in the provided recursion limit (%s)" % (origLine, recursion_limit) 1817 ) 1818 1819 return processed 1820 1821 process = processLine if recursion_limit is None else processLineToFixedPoint 1822 output = [] 1823 for directive in script: 1824 if isinstance(directive, SubstDirective): 1825 directive.adjust_substitutions(substitutions) 1826 else: 1827 if isinstance(directive, CommandDirective): 1828 line = directive.command 1829 else: 1830 # Can come from preamble_commands. 1831 assert isinstance(directive, str) 1832 line = directive 1833 output.append(unescapePercents(process(line))) 1834 1835 return output 1836 1837 1838class ParserKind(object): 1839 """ 1840 An enumeration representing the style of an integrated test keyword or 1841 command. 1842 1843 TAG: A keyword taking no value. Ex 'END.' 1844 COMMAND: A keyword taking a list of shell commands. Ex 'RUN:' 1845 LIST: A keyword taking a comma-separated list of values. 1846 SPACE_LIST: A keyword taking a space-separated list of values. 1847 BOOLEAN_EXPR: A keyword taking a comma-separated list of 1848 boolean expressions. Ex 'XFAIL:' 1849 INTEGER: A keyword taking a single integer. Ex 'ALLOW_RETRIES:' 1850 CUSTOM: A keyword with custom parsing semantics. 1851 DEFINE: A keyword taking a new lit substitution definition. Ex 1852 'DEFINE: %{name}=value' 1853 REDEFINE: A keyword taking a lit substitution redefinition. Ex 1854 'REDEFINE: %{name}=value' 1855 """ 1856 1857 TAG = 0 1858 COMMAND = 1 1859 LIST = 2 1860 SPACE_LIST = 3 1861 BOOLEAN_EXPR = 4 1862 INTEGER = 5 1863 CUSTOM = 6 1864 DEFINE = 7 1865 REDEFINE = 8 1866 1867 @staticmethod 1868 def allowedKeywordSuffixes(value): 1869 return { 1870 ParserKind.TAG: ["."], 1871 ParserKind.COMMAND: [":"], 1872 ParserKind.LIST: [":"], 1873 ParserKind.SPACE_LIST: [":"], 1874 ParserKind.BOOLEAN_EXPR: [":"], 1875 ParserKind.INTEGER: [":"], 1876 ParserKind.CUSTOM: [":", "."], 1877 ParserKind.DEFINE: [":"], 1878 ParserKind.REDEFINE: [":"], 1879 }[value] 1880 1881 @staticmethod 1882 def str(value): 1883 return { 1884 ParserKind.TAG: "TAG", 1885 ParserKind.COMMAND: "COMMAND", 1886 ParserKind.LIST: "LIST", 1887 ParserKind.SPACE_LIST: "SPACE_LIST", 1888 ParserKind.BOOLEAN_EXPR: "BOOLEAN_EXPR", 1889 ParserKind.INTEGER: "INTEGER", 1890 ParserKind.CUSTOM: "CUSTOM", 1891 ParserKind.DEFINE: "DEFINE", 1892 ParserKind.REDEFINE: "REDEFINE", 1893 }[value] 1894 1895 1896class IntegratedTestKeywordParser(object): 1897 """A parser for LLVM/Clang style integrated test scripts. 1898 1899 keyword: The keyword to parse for. It must end in either '.' or ':'. 1900 kind: An value of ParserKind. 1901 parser: A custom parser. This value may only be specified with 1902 ParserKind.CUSTOM. 1903 """ 1904 1905 def __init__(self, keyword, kind, parser=None, initial_value=None): 1906 allowedSuffixes = ParserKind.allowedKeywordSuffixes(kind) 1907 if len(keyword) == 0 or keyword[-1] not in allowedSuffixes: 1908 if len(allowedSuffixes) == 1: 1909 raise ValueError( 1910 "Keyword '%s' of kind '%s' must end in '%s'" 1911 % (keyword, ParserKind.str(kind), allowedSuffixes[0]) 1912 ) 1913 else: 1914 raise ValueError( 1915 "Keyword '%s' of kind '%s' must end in " 1916 " one of '%s'" 1917 % (keyword, ParserKind.str(kind), " ".join(allowedSuffixes)) 1918 ) 1919 1920 if parser is not None and kind != ParserKind.CUSTOM: 1921 raise ValueError( 1922 "custom parsers can only be specified with " "ParserKind.CUSTOM" 1923 ) 1924 self.keyword = keyword 1925 self.kind = kind 1926 self.parsed_lines = [] 1927 self.value = initial_value 1928 self.parser = parser 1929 1930 if kind == ParserKind.COMMAND: 1931 self.parser = lambda line_number, line, output: self._handleCommand( 1932 line_number, line, output, self.keyword 1933 ) 1934 elif kind == ParserKind.LIST: 1935 self.parser = self._handleList 1936 elif kind == ParserKind.SPACE_LIST: 1937 self.parser = self._handleSpaceList 1938 elif kind == ParserKind.BOOLEAN_EXPR: 1939 self.parser = self._handleBooleanExpr 1940 elif kind == ParserKind.INTEGER: 1941 self.parser = self._handleSingleInteger 1942 elif kind == ParserKind.TAG: 1943 self.parser = self._handleTag 1944 elif kind == ParserKind.CUSTOM: 1945 if parser is None: 1946 raise ValueError("ParserKind.CUSTOM requires a custom parser") 1947 self.parser = parser 1948 elif kind == ParserKind.DEFINE: 1949 self.parser = lambda line_number, line, output: self._handleSubst( 1950 line_number, line, output, self.keyword, new_subst=True 1951 ) 1952 elif kind == ParserKind.REDEFINE: 1953 self.parser = lambda line_number, line, output: self._handleSubst( 1954 line_number, line, output, self.keyword, new_subst=False 1955 ) 1956 else: 1957 raise ValueError("Unknown kind '%s'" % kind) 1958 1959 def parseLine(self, line_number, line): 1960 try: 1961 self.parsed_lines += [(line_number, line)] 1962 self.value = self.parser(line_number, line, self.value) 1963 except ValueError as e: 1964 raise ValueError( 1965 str(e) 1966 + ("\nin %s directive on test line %d" % (self.keyword, line_number)) 1967 ) 1968 1969 def getValue(self): 1970 return self.value 1971 1972 @staticmethod 1973 def _handleTag(line_number, line, output): 1974 """A helper for parsing TAG type keywords""" 1975 return not line.strip() or output 1976 1977 @staticmethod 1978 def _substituteLineNumbers(line_number, line): 1979 line = re.sub(r"%\(line\)", str(line_number), line) 1980 1981 def replace_line_number(match): 1982 if match.group(1) == "+": 1983 return str(line_number + int(match.group(2))) 1984 if match.group(1) == "-": 1985 return str(line_number - int(match.group(2))) 1986 1987 return re.sub(r"%\(line *([\+-]) *(\d+)\)", replace_line_number, line) 1988 1989 @classmethod 1990 def _handleCommand(cls, line_number, line, output, keyword): 1991 """A helper for parsing COMMAND type keywords""" 1992 # Substitute line number expressions. 1993 line = cls._substituteLineNumbers(line_number, line) 1994 1995 # Collapse lines with trailing '\\', or add line with line number to 1996 # start a new pipeline. 1997 if not output or not output[-1].add_continuation(line_number, keyword, line): 1998 if output is None: 1999 output = [] 2000 line = buildPdbgCommand(f"{keyword} at line {line_number}", line) 2001 output.append(CommandDirective(line_number, line_number, keyword, line)) 2002 return output 2003 2004 @staticmethod 2005 def _handleList(line_number, line, output): 2006 """A parser for LIST type keywords""" 2007 if output is None: 2008 output = [] 2009 output.extend([s.strip() for s in line.split(",")]) 2010 return output 2011 2012 @staticmethod 2013 def _handleSpaceList(line_number, line, output): 2014 """A parser for SPACE_LIST type keywords""" 2015 if output is None: 2016 output = [] 2017 output.extend([s.strip() for s in line.split(" ") if s.strip() != ""]) 2018 return output 2019 2020 @staticmethod 2021 def _handleSingleInteger(line_number, line, output): 2022 """A parser for INTEGER type keywords""" 2023 if output is None: 2024 output = [] 2025 try: 2026 n = int(line) 2027 except ValueError: 2028 raise ValueError( 2029 "INTEGER parser requires the input to be an integer (got {})".format( 2030 line 2031 ) 2032 ) 2033 output.append(n) 2034 return output 2035 2036 @staticmethod 2037 def _handleBooleanExpr(line_number, line, output): 2038 """A parser for BOOLEAN_EXPR type keywords""" 2039 parts = [s.strip() for s in line.split(",") if s.strip() != ""] 2040 if output and output[-1][-1] == "\\": 2041 output[-1] = output[-1][:-1] + parts[0] 2042 del parts[0] 2043 if output is None: 2044 output = [] 2045 output.extend(parts) 2046 # Evaluate each expression to verify syntax. 2047 # We don't want any results, just the raised ValueError. 2048 for s in output: 2049 if s != "*" and not s.endswith("\\"): 2050 BooleanExpression.evaluate(s, []) 2051 return output 2052 2053 @classmethod 2054 def _handleSubst(cls, line_number, line, output, keyword, new_subst): 2055 """A parser for DEFINE and REDEFINE type keywords""" 2056 line = cls._substituteLineNumbers(line_number, line) 2057 if output and output[-1].add_continuation(line_number, keyword, line): 2058 return output 2059 if output is None: 2060 output = [] 2061 output.append( 2062 SubstDirective(line_number, line_number, keyword, new_subst, line) 2063 ) 2064 return output 2065 2066 2067def _parseKeywords(sourcepath, additional_parsers=[], require_script=True): 2068 """_parseKeywords 2069 2070 Scan an LLVM/Clang style integrated test script and extract all the lines 2071 pertaining to a special parser. This includes 'RUN', 'XFAIL', 'REQUIRES', 2072 'UNSUPPORTED', 'ALLOW_RETRIES', 'END', 'DEFINE', 'REDEFINE', as well as 2073 other specified custom parsers. 2074 2075 Returns a dictionary mapping each custom parser to its value after 2076 parsing the test. 2077 """ 2078 # Install the built-in keyword parsers. 2079 script = [] 2080 builtin_parsers = [ 2081 IntegratedTestKeywordParser("RUN:", ParserKind.COMMAND, initial_value=script), 2082 IntegratedTestKeywordParser("XFAIL:", ParserKind.BOOLEAN_EXPR), 2083 IntegratedTestKeywordParser("REQUIRES:", ParserKind.BOOLEAN_EXPR), 2084 IntegratedTestKeywordParser("UNSUPPORTED:", ParserKind.BOOLEAN_EXPR), 2085 IntegratedTestKeywordParser("ALLOW_RETRIES:", ParserKind.INTEGER), 2086 IntegratedTestKeywordParser("END.", ParserKind.TAG), 2087 IntegratedTestKeywordParser("DEFINE:", ParserKind.DEFINE, initial_value=script), 2088 IntegratedTestKeywordParser( 2089 "REDEFINE:", ParserKind.REDEFINE, initial_value=script 2090 ), 2091 ] 2092 keyword_parsers = {p.keyword: p for p in builtin_parsers} 2093 2094 # Install user-defined additional parsers. 2095 for parser in additional_parsers: 2096 if not isinstance(parser, IntegratedTestKeywordParser): 2097 raise ValueError( 2098 "Additional parser must be an instance of " 2099 "IntegratedTestKeywordParser" 2100 ) 2101 if parser.keyword in keyword_parsers: 2102 raise ValueError("Parser for keyword '%s' already exists" % parser.keyword) 2103 keyword_parsers[parser.keyword] = parser 2104 2105 # Collect the test lines from the script. 2106 for line_number, command_type, ln in parseIntegratedTestScriptCommands( 2107 sourcepath, keyword_parsers.keys() 2108 ): 2109 parser = keyword_parsers[command_type] 2110 parser.parseLine(line_number, ln) 2111 if command_type == "END." and parser.getValue() is True: 2112 break 2113 2114 # Verify the script contains a run line. 2115 if require_script and not any( 2116 isinstance(directive, CommandDirective) for directive in script 2117 ): 2118 raise ValueError("Test has no 'RUN:' line") 2119 2120 # Check for unterminated run or subst lines. 2121 # 2122 # If, after a line continuation for one kind of directive (e.g., 'RUN:', 2123 # 'DEFINE:', 'REDEFINE:') in script, the next directive in script is a 2124 # different kind, then the '\\' remains on the former, and we report it 2125 # here. 2126 for directive in script: 2127 if directive.needs_continuation(): 2128 raise ValueError( 2129 f"Test has unterminated '{directive.keyword}' " 2130 f"directive (with '\\') " 2131 f"{directive.get_location()}" 2132 ) 2133 2134 # Check boolean expressions for unterminated lines. 2135 for key in keyword_parsers: 2136 kp = keyword_parsers[key] 2137 if kp.kind != ParserKind.BOOLEAN_EXPR: 2138 continue 2139 value = kp.getValue() 2140 if value and value[-1][-1] == "\\": 2141 raise ValueError( 2142 "Test has unterminated '{key}' lines (with '\\')".format(key=key) 2143 ) 2144 2145 # Make sure there's at most one ALLOW_RETRIES: line 2146 allowed_retries = keyword_parsers["ALLOW_RETRIES:"].getValue() 2147 if allowed_retries and len(allowed_retries) > 1: 2148 raise ValueError("Test has more than one ALLOW_RETRIES lines") 2149 2150 return {p.keyword: p.getValue() for p in keyword_parsers.values()} 2151 2152 2153def parseIntegratedTestScript(test, additional_parsers=[], require_script=True): 2154 """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test 2155 script and extract the lines to 'RUN' as well as 'XFAIL', 'REQUIRES', 2156 'UNSUPPORTED' and 'ALLOW_RETRIES' information into the given test. 2157 2158 If additional parsers are specified then the test is also scanned for the 2159 keywords they specify and all matches are passed to the custom parser. 2160 2161 If 'require_script' is False an empty script 2162 may be returned. This can be used for test formats where the actual script 2163 is optional or ignored. 2164 """ 2165 # Parse the test sources and extract test properties 2166 try: 2167 parsed = _parseKeywords( 2168 test.getSourcePath(), additional_parsers, require_script 2169 ) 2170 except ValueError as e: 2171 return lit.Test.Result(Test.UNRESOLVED, str(e)) 2172 script = parsed["RUN:"] or [] 2173 assert parsed["DEFINE:"] == script 2174 assert parsed["REDEFINE:"] == script 2175 test.xfails += parsed["XFAIL:"] or [] 2176 test.requires += parsed["REQUIRES:"] or [] 2177 test.unsupported += parsed["UNSUPPORTED:"] or [] 2178 if parsed["ALLOW_RETRIES:"]: 2179 test.allowed_retries = parsed["ALLOW_RETRIES:"][0] 2180 2181 # Enforce REQUIRES: 2182 missing_required_features = test.getMissingRequiredFeatures() 2183 if missing_required_features: 2184 msg = ", ".join(missing_required_features) 2185 return lit.Test.Result( 2186 Test.UNSUPPORTED, 2187 "Test requires the following unavailable " "features: %s" % msg, 2188 ) 2189 2190 # Enforce UNSUPPORTED: 2191 unsupported_features = test.getUnsupportedFeatures() 2192 if unsupported_features: 2193 msg = ", ".join(unsupported_features) 2194 return lit.Test.Result( 2195 Test.UNSUPPORTED, 2196 "Test does not support the following features " "and/or targets: %s" % msg, 2197 ) 2198 2199 # Enforce limit_to_features. 2200 if not test.isWithinFeatureLimits(): 2201 msg = ", ".join(test.config.limit_to_features) 2202 return lit.Test.Result( 2203 Test.UNSUPPORTED, 2204 "Test does not require any of the features " 2205 "specified in limit_to_features: %s" % msg, 2206 ) 2207 2208 return script 2209 2210 2211def _runShTest(test, litConfig, useExternalSh, script, tmpBase) -> lit.Test.Result: 2212 # Always returns the tuple (out, err, exitCode, timeoutInfo, status). 2213 def runOnce( 2214 execdir, 2215 ) -> Tuple[str, str, int, Optional[str], Test.ResultCode]: 2216 # script is modified below (for litConfig.per_test_coverage, and for 2217 # %dbg expansions). runOnce can be called multiple times, but applying 2218 # the modifications multiple times can corrupt script, so always modify 2219 # a copy. 2220 scriptCopy = script[:] 2221 # Set unique LLVM_PROFILE_FILE for each run command 2222 if litConfig.per_test_coverage: 2223 # Extract the test case name from the test object, and remove the 2224 # file extension. 2225 test_case_name = test.path_in_suite[-1] 2226 test_case_name = test_case_name.rsplit(".", 1)[0] 2227 coverage_index = 0 # Counter for coverage file index 2228 for i, ln in enumerate(scriptCopy): 2229 match = re.fullmatch(kPdbgRegex, ln) 2230 if match: 2231 dbg = match.group(1) 2232 command = match.group(2) 2233 else: 2234 command = ln 2235 profile = f"{test_case_name}{coverage_index}.profraw" 2236 coverage_index += 1 2237 command = f"export LLVM_PROFILE_FILE={profile}; {command}" 2238 if match: 2239 command = buildPdbgCommand(dbg, command) 2240 scriptCopy[i] = command 2241 2242 try: 2243 if useExternalSh: 2244 res = executeScript(test, litConfig, tmpBase, scriptCopy, execdir) 2245 else: 2246 res = executeScriptInternal( 2247 test, litConfig, tmpBase, scriptCopy, execdir 2248 ) 2249 except ScriptFatal as e: 2250 out = f"# " + "\n# ".join(str(e).splitlines()) + "\n" 2251 return out, "", 1, None, Test.UNRESOLVED 2252 2253 out, err, exitCode, timeoutInfo = res 2254 if exitCode == 0: 2255 status = Test.PASS 2256 else: 2257 if timeoutInfo is None: 2258 status = Test.FAIL 2259 else: 2260 status = Test.TIMEOUT 2261 return out, err, exitCode, timeoutInfo, status 2262 2263 # Create the output directory if it does not already exist. 2264 lit.util.mkdir_p(os.path.dirname(tmpBase)) 2265 2266 # Re-run failed tests up to test.allowed_retries times. 2267 execdir = os.path.dirname(test.getExecPath()) 2268 attempts = test.allowed_retries + 1 2269 for i in range(attempts): 2270 res = runOnce(execdir) 2271 out, err, exitCode, timeoutInfo, status = res 2272 if status != Test.FAIL: 2273 break 2274 2275 # If we had to run the test more than once, count it as a flaky pass. These 2276 # will be printed separately in the test summary. 2277 if i > 0 and status == Test.PASS: 2278 status = Test.FLAKYPASS 2279 2280 # Form the output log. 2281 output = f"Exit Code: {exitCode}\n" 2282 2283 if timeoutInfo is not None: 2284 output += """Timeout: %s\n""" % (timeoutInfo,) 2285 output += "\n" 2286 2287 # Append the outputs, if present. 2288 if out: 2289 output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,) 2290 if err: 2291 output += """Command Output (stderr):\n--\n%s\n--\n""" % (err,) 2292 2293 return lit.Test.Result(status, output) 2294 2295 2296def executeShTest( 2297 test, litConfig, useExternalSh, extra_substitutions=[], preamble_commands=[] 2298): 2299 if test.config.unsupported: 2300 return lit.Test.Result(Test.UNSUPPORTED, "Test is unsupported") 2301 2302 script = list(preamble_commands) 2303 script = [buildPdbgCommand(f"preamble command line", ln) for ln in script] 2304 2305 parsed = parseIntegratedTestScript(test, require_script=not script) 2306 if isinstance(parsed, lit.Test.Result): 2307 return parsed 2308 script += parsed 2309 2310 if litConfig.noExecute: 2311 return lit.Test.Result(Test.PASS) 2312 2313 tmpDir, tmpBase = getTempPaths(test) 2314 substitutions = list(extra_substitutions) 2315 substitutions += getDefaultSubstitutions( 2316 test, tmpDir, tmpBase, normalize_slashes=useExternalSh 2317 ) 2318 conditions = {feature: True for feature in test.config.available_features} 2319 script = applySubstitutions( 2320 script, 2321 substitutions, 2322 conditions, 2323 recursion_limit=test.config.recursiveExpansionLimit, 2324 ) 2325 2326 return _runShTest(test, litConfig, useExternalSh, script, tmpBase) 2327