xref: /llvm-project/llvm/utils/lit/lit/TestRunner.py (revision 876f661dbeb42a76767edfb1f36214baacb27fd4)
1from __future__ import absolute_import
2import errno
3import io
4import itertools
5import getopt
6import os, signal, subprocess, sys
7import re
8import stat
9import pathlib
10import platform
11import shlex
12import shutil
13import tempfile
14import threading
15import typing
16from typing import Optional, Tuple
17
18import io
19
20try:
21    from StringIO import StringIO
22except ImportError:
23    from io import StringIO
24
25from lit.ShCommands import GlobItem, Command
26import lit.ShUtil as ShUtil
27import lit.Test as Test
28import lit.util
29from lit.util import to_bytes, to_string, to_unicode
30from lit.BooleanExpression import BooleanExpression
31
32
33class InternalShellError(Exception):
34    def __init__(self, command, message):
35        self.command = command
36        self.message = message
37
38
39class ScriptFatal(Exception):
40    """
41    A script had a fatal error such that there's no point in retrying.  The
42    message has not been emitted on stdout or stderr but is instead included in
43    this exception.
44    """
45
46    def __init__(self, message):
47        super().__init__(message)
48
49
50kIsWindows = platform.system() == "Windows"
51
52# Don't use close_fds on Windows.
53kUseCloseFDs = not kIsWindows
54
55# Use temporary files to replace /dev/null on Windows.
56kAvoidDevNull = kIsWindows
57kDevNull = "/dev/null"
58
59# A regex that matches %dbg(ARG), which lit inserts at the beginning of each
60# run command pipeline such that ARG specifies the pipeline's source line
61# number.  lit later expands each %dbg(ARG) to a command that behaves as a null
62# command in the target shell so that the line number is seen in lit's verbose
63# mode.
64#
65# This regex captures ARG.  ARG must not contain a right parenthesis, which
66# terminates %dbg.  ARG must not contain quotes, in which ARG might be enclosed
67# during expansion.
68#
69# COMMAND that follows %dbg(ARG) is also captured. COMMAND can be
70# empty as a result of conditinal substitution.
71kPdbgRegex = "%dbg\\(([^)'\"]*)\\)((?:.|\\n)*)"
72
73
74def buildPdbgCommand(msg, cmd):
75    res = f"%dbg({msg}) {cmd}"
76    assert re.fullmatch(
77        kPdbgRegex, res
78    ), f"kPdbgRegex expected to match actual %dbg usage: {res}"
79    return res
80
81
82class ShellEnvironment(object):
83
84    """Mutable shell environment containing things like CWD and env vars.
85
86    Environment variables are not implemented, but cwd tracking is. In addition,
87    we maintain a dir stack for pushd/popd.
88    """
89
90    def __init__(self, cwd, env):
91        self.cwd = cwd
92        self.env = dict(env)
93        self.dirStack = []
94
95    def change_dir(self, newdir):
96        if os.path.isabs(newdir):
97            self.cwd = newdir
98        else:
99            self.cwd = lit.util.abs_path_preserve_drive(os.path.join(self.cwd, newdir))
100
101
102class TimeoutHelper(object):
103    """
104    Object used to helper manage enforcing a timeout in
105    _executeShCmd(). It is passed through recursive calls
106    to collect processes that have been executed so that when
107    the timeout happens they can be killed.
108    """
109
110    def __init__(self, timeout):
111        self.timeout = timeout
112        self._procs = []
113        self._timeoutReached = False
114        self._doneKillPass = False
115        # This lock will be used to protect concurrent access
116        # to _procs and _doneKillPass
117        self._lock = None
118        self._timer = None
119
120    def cancel(self):
121        if not self.active():
122            return
123        self._timer.cancel()
124
125    def active(self):
126        return self.timeout > 0
127
128    def addProcess(self, proc):
129        if not self.active():
130            return
131        needToRunKill = False
132        with self._lock:
133            self._procs.append(proc)
134            # Avoid re-entering the lock by finding out if kill needs to be run
135            # again here but call it if necessary once we have left the lock.
136            # We could use a reentrant lock here instead but this code seems
137            # clearer to me.
138            needToRunKill = self._doneKillPass
139
140        # The initial call to _kill() from the timer thread already happened so
141        # we need to call it again from this thread, otherwise this process
142        # will be left to run even though the timeout was already hit
143        if needToRunKill:
144            assert self.timeoutReached()
145            self._kill()
146
147    def startTimer(self):
148        if not self.active():
149            return
150
151        # Do some late initialisation that's only needed
152        # if there is a timeout set
153        self._lock = threading.Lock()
154        self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
155        self._timer.start()
156
157    def _handleTimeoutReached(self):
158        self._timeoutReached = True
159        self._kill()
160
161    def timeoutReached(self):
162        return self._timeoutReached
163
164    def _kill(self):
165        """
166        This method may be called multiple times as we might get unlucky
167        and be in the middle of creating a new process in _executeShCmd()
168        which won't yet be in ``self._procs``. By locking here and in
169        addProcess() we should be able to kill processes launched after
170        the initial call to _kill()
171        """
172        with self._lock:
173            for p in self._procs:
174                lit.util.killProcessAndChildren(p.pid)
175            # Empty the list and note that we've done a pass over the list
176            self._procs = []  # Python2 doesn't have list.clear()
177            self._doneKillPass = True
178
179
180class ShellCommandResult(object):
181    """Captures the result of an individual command."""
182
183    def __init__(
184        self, command, stdout, stderr, exitCode, timeoutReached, outputFiles=[]
185    ):
186        self.command = command
187        self.stdout = stdout
188        self.stderr = stderr
189        self.exitCode = exitCode
190        self.timeoutReached = timeoutReached
191        self.outputFiles = list(outputFiles)
192
193
194def executeShCmd(cmd, shenv, results, timeout=0):
195    """
196    Wrapper around _executeShCmd that handles
197    timeout
198    """
199    # Use the helper even when no timeout is required to make
200    # other code simpler (i.e. avoid bunch of ``is not None`` checks)
201    timeoutHelper = TimeoutHelper(timeout)
202    if timeout > 0:
203        timeoutHelper.startTimer()
204    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
205    timeoutHelper.cancel()
206    timeoutInfo = None
207    if timeoutHelper.timeoutReached():
208        timeoutInfo = "Reached timeout of {} seconds".format(timeout)
209
210    return (finalExitCode, timeoutInfo)
211
212
213def expand_glob(arg, cwd):
214    if isinstance(arg, GlobItem):
215        return sorted(arg.resolve(cwd))
216    return [arg]
217
218
219def expand_glob_expressions(args, cwd):
220    result = [args[0]]
221    for arg in args[1:]:
222        result.extend(expand_glob(arg, cwd))
223    return result
224
225
226def quote_windows_command(seq):
227    r"""
228    Reimplement Python's private subprocess.list2cmdline for MSys compatibility
229
230    Based on CPython implementation here:
231      https://hg.python.org/cpython/file/849826a900d2/Lib/subprocess.py#l422
232
233    Some core util distributions (MSys) don't tokenize command line arguments
234    the same way that MSVC CRT does. Lit rolls its own quoting logic similar to
235    the stock CPython logic to paper over these quoting and tokenization rule
236    differences.
237
238    We use the same algorithm from MSDN as CPython
239    (http://msdn.microsoft.com/en-us/library/17w5ykft.aspx), but we treat more
240    characters as needing quoting, such as double quotes themselves, and square
241    brackets.
242
243    For MSys based tools, this is very brittle though, because quoting an
244    argument makes the MSys based tool unescape backslashes where it shouldn't
245    (e.g. "a\b\\c\\\\d" becomes "a\b\c\\d" where it should stay as it was,
246    according to regular win32 command line parsing rules).
247    """
248    result = []
249    needquote = False
250    for arg in seq:
251        bs_buf = []
252
253        # Add a space to separate this argument from the others
254        if result:
255            result.append(" ")
256
257        # This logic differs from upstream list2cmdline.
258        needquote = (
259            (" " in arg)
260            or ("\t" in arg)
261            or ('"' in arg)
262            or ("[" in arg)
263            or (";" in arg)
264            or not arg
265        )
266        if needquote:
267            result.append('"')
268
269        for c in arg:
270            if c == "\\":
271                # Don't know if we need to double yet.
272                bs_buf.append(c)
273            elif c == '"':
274                # Double backslashes.
275                result.append("\\" * len(bs_buf) * 2)
276                bs_buf = []
277                result.append('\\"')
278            else:
279                # Normal char
280                if bs_buf:
281                    result.extend(bs_buf)
282                    bs_buf = []
283                result.append(c)
284
285        # Add remaining backslashes, if any.
286        if bs_buf:
287            result.extend(bs_buf)
288
289        if needquote:
290            result.extend(bs_buf)
291            result.append('"')
292
293    return "".join(result)
294
295
296# args are from 'export' or 'env' command.
297# Skips the command, and parses its arguments.
298# Modifies env accordingly.
299# Returns copy of args without the command or its arguments.
300def updateEnv(env, args):
301    arg_idx_next = len(args)
302    unset_next_env_var = False
303    for arg_idx, arg in enumerate(args[1:]):
304        # Support for the -u flag (unsetting) for env command
305        # e.g., env -u FOO -u BAR will remove both FOO and BAR
306        # from the environment.
307        if arg == "-u":
308            unset_next_env_var = True
309            continue
310        if unset_next_env_var:
311            unset_next_env_var = False
312            if arg in env.env:
313                del env.env[arg]
314            continue
315
316        # Partition the string into KEY=VALUE.
317        key, eq, val = arg.partition("=")
318        # Stop if there was no equals.
319        if eq == "":
320            arg_idx_next = arg_idx + 1
321            break
322        env.env[key] = val
323    return args[arg_idx_next:]
324
325
326def executeBuiltinCd(cmd, shenv):
327    """executeBuiltinCd - Change the current directory."""
328    if len(cmd.args) != 2:
329        raise InternalShellError(cmd, "'cd' supports only one argument")
330    # Update the cwd in the parent environment.
331    shenv.change_dir(cmd.args[1])
332    # The cd builtin always succeeds. If the directory does not exist, the
333    # following Popen calls will fail instead.
334    return ShellCommandResult(cmd, "", "", 0, False)
335
336
337def executeBuiltinPushd(cmd, shenv):
338    """executeBuiltinPushd - Change the current dir and save the old."""
339    if len(cmd.args) != 2:
340        raise InternalShellError(cmd, "'pushd' supports only one argument")
341    shenv.dirStack.append(shenv.cwd)
342    shenv.change_dir(cmd.args[1])
343    return ShellCommandResult(cmd, "", "", 0, False)
344
345
346def executeBuiltinPopd(cmd, shenv):
347    """executeBuiltinPopd - Restore a previously saved working directory."""
348    if len(cmd.args) != 1:
349        raise InternalShellError(cmd, "'popd' does not support arguments")
350    if not shenv.dirStack:
351        raise InternalShellError(cmd, "popd: directory stack empty")
352    shenv.cwd = shenv.dirStack.pop()
353    return ShellCommandResult(cmd, "", "", 0, False)
354
355
356def executeBuiltinExport(cmd, shenv):
357    """executeBuiltinExport - Set an environment variable."""
358    if len(cmd.args) != 2:
359        raise InternalShellError(cmd, "'export' supports only one argument")
360    updateEnv(shenv, cmd.args)
361    return ShellCommandResult(cmd, "", "", 0, False)
362
363
364def executeBuiltinEcho(cmd, shenv):
365    """Interpret a redirected echo or @echo command"""
366    opened_files = []
367    stdin, stdout, stderr = processRedirects(cmd, subprocess.PIPE, shenv, opened_files)
368    if stdin != subprocess.PIPE or stderr != subprocess.PIPE:
369        raise InternalShellError(
370            cmd, f"stdin and stderr redirects not supported for {cmd.args[0]}"
371        )
372
373    # Some tests have un-redirected echo commands to help debug test failures.
374    # Buffer our output and return it to the caller.
375    is_redirected = True
376    encode = lambda x: x
377    if stdout == subprocess.PIPE:
378        is_redirected = False
379        stdout = StringIO()
380    elif kIsWindows:
381        # Reopen stdout in binary mode to avoid CRLF translation. The versions
382        # of echo we are replacing on Windows all emit plain LF, and the LLVM
383        # tests now depend on this.
384        # When we open as binary, however, this also means that we have to write
385        # 'bytes' objects to stdout instead of 'str' objects.
386        encode = lit.util.to_bytes
387        stdout = open(stdout.name, stdout.mode + "b")
388        opened_files.append((None, None, stdout, None))
389
390    # Implement echo flags. We only support -e and -n, and not yet in
391    # combination. We have to ignore unknown flags, because `echo "-D FOO"`
392    # prints the dash.
393    args = cmd.args[1:]
394    interpret_escapes = False
395    write_newline = True
396    while len(args) >= 1 and args[0] in ("-e", "-n"):
397        flag = args[0]
398        args = args[1:]
399        if flag == "-e":
400            interpret_escapes = True
401        elif flag == "-n":
402            write_newline = False
403
404    def maybeUnescape(arg):
405        if not interpret_escapes:
406            return arg
407
408        arg = lit.util.to_bytes(arg)
409        return arg.decode("unicode_escape")
410
411    if args:
412        for arg in args[:-1]:
413            stdout.write(encode(maybeUnescape(arg)))
414            stdout.write(encode(" "))
415        stdout.write(encode(maybeUnescape(args[-1])))
416    if write_newline:
417        stdout.write(encode("\n"))
418
419    for (name, mode, f, path) in opened_files:
420        f.close()
421
422    output = "" if is_redirected else stdout.getvalue()
423    return ShellCommandResult(cmd, output, "", 0, False)
424
425
426def executeBuiltinMkdir(cmd, cmd_shenv):
427    """executeBuiltinMkdir - Create new directories."""
428    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
429    try:
430        opts, args = getopt.gnu_getopt(args, "p")
431    except getopt.GetoptError as err:
432        raise InternalShellError(cmd, "Unsupported: 'mkdir':  %s" % str(err))
433
434    parent = False
435    for o, a in opts:
436        if o == "-p":
437            parent = True
438        else:
439            assert False, "unhandled option"
440
441    if len(args) == 0:
442        raise InternalShellError(cmd, "Error: 'mkdir' is missing an operand")
443
444    stderr = StringIO()
445    exitCode = 0
446    for dir in args:
447        cwd = cmd_shenv.cwd
448        dir = to_unicode(dir) if kIsWindows else to_bytes(dir)
449        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
450        if not os.path.isabs(dir):
451            dir = lit.util.abs_path_preserve_drive(os.path.join(cwd, dir))
452        if parent:
453            lit.util.mkdir_p(dir)
454        else:
455            try:
456                lit.util.mkdir(dir)
457            except OSError as err:
458                stderr.write("Error: 'mkdir' command failed, %s\n" % str(err))
459                exitCode = 1
460    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
461
462
463def executeBuiltinRm(cmd, cmd_shenv):
464    """executeBuiltinRm - Removes (deletes) files or directories."""
465    args = expand_glob_expressions(cmd.args, cmd_shenv.cwd)[1:]
466    try:
467        opts, args = getopt.gnu_getopt(args, "frR", ["--recursive"])
468    except getopt.GetoptError as err:
469        raise InternalShellError(cmd, "Unsupported: 'rm':  %s" % str(err))
470
471    force = False
472    recursive = False
473    for o, a in opts:
474        if o == "-f":
475            force = True
476        elif o in ("-r", "-R", "--recursive"):
477            recursive = True
478        else:
479            assert False, "unhandled option"
480
481    if len(args) == 0:
482        raise InternalShellError(cmd, "Error: 'rm' is missing an operand")
483
484    def on_rm_error(func, path, exc_info):
485        # path contains the path of the file that couldn't be removed
486        # let's just assume that it's read-only and remove it.
487        os.chmod(path, stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE)
488        os.remove(path)
489
490    stderr = StringIO()
491    exitCode = 0
492    for path in args:
493        cwd = cmd_shenv.cwd
494        path = to_unicode(path) if kIsWindows else to_bytes(path)
495        cwd = to_unicode(cwd) if kIsWindows else to_bytes(cwd)
496        if not os.path.isabs(path):
497            path = lit.util.abs_path_preserve_drive(os.path.join(cwd, path))
498        if force and not os.path.exists(path):
499            continue
500        try:
501            if os.path.isdir(path):
502                if not recursive:
503                    stderr.write("Error: %s is a directory\n" % path)
504                    exitCode = 1
505                if platform.system() == "Windows":
506                    # NOTE: use ctypes to access `SHFileOperationsW` on Windows to
507                    # use the NT style path to get access to long file paths which
508                    # cannot be removed otherwise.
509                    from ctypes.wintypes import BOOL, HWND, LPCWSTR, UINT, WORD
510                    from ctypes import addressof, byref, c_void_p, create_unicode_buffer
511                    from ctypes import Structure
512                    from ctypes import windll, WinError, POINTER
513
514                    class SHFILEOPSTRUCTW(Structure):
515                        _fields_ = [
516                            ("hWnd", HWND),
517                            ("wFunc", UINT),
518                            ("pFrom", LPCWSTR),
519                            ("pTo", LPCWSTR),
520                            ("fFlags", WORD),
521                            ("fAnyOperationsAborted", BOOL),
522                            ("hNameMappings", c_void_p),
523                            ("lpszProgressTitle", LPCWSTR),
524                        ]
525
526                    FO_MOVE, FO_COPY, FO_DELETE, FO_RENAME = range(1, 5)
527
528                    FOF_SILENT = 4
529                    FOF_NOCONFIRMATION = 16
530                    FOF_NOCONFIRMMKDIR = 512
531                    FOF_NOERRORUI = 1024
532
533                    FOF_NO_UI = (
534                        FOF_SILENT
535                        | FOF_NOCONFIRMATION
536                        | FOF_NOERRORUI
537                        | FOF_NOCONFIRMMKDIR
538                    )
539
540                    SHFileOperationW = windll.shell32.SHFileOperationW
541                    SHFileOperationW.argtypes = [POINTER(SHFILEOPSTRUCTW)]
542
543                    path = os.path.abspath(path)
544
545                    pFrom = create_unicode_buffer(path, len(path) + 2)
546                    pFrom[len(path)] = pFrom[len(path) + 1] = "\0"
547                    operation = SHFILEOPSTRUCTW(
548                        wFunc=UINT(FO_DELETE),
549                        pFrom=LPCWSTR(addressof(pFrom)),
550                        fFlags=FOF_NO_UI,
551                    )
552                    result = SHFileOperationW(byref(operation))
553                    if result:
554                        raise WinError(result)
555                else:
556                    shutil.rmtree(path, onerror=on_rm_error if force else None)
557            else:
558                if force and not os.access(path, os.W_OK):
559                    os.chmod(path, stat.S_IMODE(os.stat(path).st_mode) | stat.S_IWRITE)
560                os.remove(path)
561        except OSError as err:
562            stderr.write("Error: 'rm' command failed, %s" % str(err))
563            exitCode = 1
564    return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False)
565
566
567def executeBuiltinColon(cmd, cmd_shenv):
568    """executeBuiltinColon - Discard arguments and exit with status 0."""
569    return ShellCommandResult(cmd, "", "", 0, False)
570
571
572def processRedirects(cmd, stdin_source, cmd_shenv, opened_files):
573    """Return the standard fds for cmd after applying redirects
574
575    Returns the three standard file descriptors for the new child process.  Each
576    fd may be an open, writable file object or a sentinel value from the
577    subprocess module.
578    """
579
580    # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
581    # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
582    # from a file are represented with a list [file, mode, file-object]
583    # where file-object is initially None.
584    redirects = [(0,), (1,), (2,)]
585    for (op, filename) in cmd.redirects:
586        if op == (">", 2):
587            redirects[2] = [filename, "w", None]
588        elif op == (">>", 2):
589            redirects[2] = [filename, "a", None]
590        elif op == (">&", 2) and filename in "012":
591            redirects[2] = redirects[int(filename)]
592        elif op == (">&",) or op == ("&>",):
593            redirects[1] = redirects[2] = [filename, "w", None]
594        elif op == (">",):
595            redirects[1] = [filename, "w", None]
596        elif op == (">>",):
597            redirects[1] = [filename, "a", None]
598        elif op == ("<",):
599            redirects[0] = [filename, "r", None]
600        else:
601            raise InternalShellError(
602                cmd, "Unsupported redirect: %r" % ((op, filename),)
603            )
604
605    # Open file descriptors in a second pass.
606    std_fds = [None, None, None]
607    for (index, r) in enumerate(redirects):
608        # Handle the sentinel values for defaults up front.
609        if isinstance(r, tuple):
610            if r == (0,):
611                fd = stdin_source
612            elif r == (1,):
613                if index == 0:
614                    raise InternalShellError(cmd, "Unsupported redirect for stdin")
615                elif index == 1:
616                    fd = subprocess.PIPE
617                else:
618                    fd = subprocess.STDOUT
619            elif r == (2,):
620                if index != 2:
621                    raise InternalShellError(cmd, "Unsupported redirect on stdout")
622                fd = subprocess.PIPE
623            else:
624                raise InternalShellError(cmd, "Bad redirect")
625            std_fds[index] = fd
626            continue
627
628        (filename, mode, fd) = r
629
630        # Check if we already have an open fd. This can happen if stdout and
631        # stderr go to the same place.
632        if fd is not None:
633            std_fds[index] = fd
634            continue
635
636        redir_filename = None
637        name = expand_glob(filename, cmd_shenv.cwd)
638        if len(name) != 1:
639            raise InternalShellError(
640                cmd, "Unsupported: glob in " "redirect expanded to multiple files"
641            )
642        name = name[0]
643        if kAvoidDevNull and name == kDevNull:
644            fd = tempfile.TemporaryFile(mode=mode)
645        elif kIsWindows and name == "/dev/tty":
646            # Simulate /dev/tty on Windows.
647            # "CON" is a special filename for the console.
648            fd = open("CON", mode)
649        else:
650            # Make sure relative paths are relative to the cwd.
651            redir_filename = os.path.join(cmd_shenv.cwd, name)
652            redir_filename = (
653                to_unicode(redir_filename) if kIsWindows else to_bytes(redir_filename)
654            )
655            fd = open(redir_filename, mode)
656        # Workaround a Win32 and/or subprocess bug when appending.
657        #
658        # FIXME: Actually, this is probably an instance of PR6753.
659        if mode == "a":
660            fd.seek(0, 2)
661        # Mutate the underlying redirect list so that we can redirect stdout
662        # and stderr to the same place without opening the file twice.
663        r[2] = fd
664        opened_files.append((filename, mode, fd) + (redir_filename,))
665        std_fds[index] = fd
666
667    return std_fds
668
669
670def _executeShCmd(cmd, shenv, results, timeoutHelper):
671    if timeoutHelper.timeoutReached():
672        # Prevent further recursion if the timeout has been hit
673        # as we should try avoid launching more processes.
674        return None
675
676    if isinstance(cmd, ShUtil.Seq):
677        if cmd.op == ";":
678            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
679            return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
680
681        if cmd.op == "&":
682            raise InternalShellError(cmd, "unsupported shell operator: '&'")
683
684        if cmd.op == "||":
685            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
686            if res != 0:
687                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
688            return res
689
690        if cmd.op == "&&":
691            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
692            if res is None:
693                return res
694
695            if res == 0:
696                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
697            return res
698
699        raise ValueError("Unknown shell command: %r" % cmd.op)
700    assert isinstance(cmd, ShUtil.Pipeline)
701
702    procs = []
703    proc_not_counts = []
704    default_stdin = subprocess.PIPE
705    stderrTempFiles = []
706    opened_files = []
707    named_temp_files = []
708    builtin_commands = set(["cat", "diff"])
709    builtin_commands_dir = os.path.join(
710        os.path.dirname(os.path.abspath(__file__)), "builtin_commands"
711    )
712    inproc_builtins = {
713        "cd": executeBuiltinCd,
714        "export": executeBuiltinExport,
715        "echo": executeBuiltinEcho,
716        "@echo": executeBuiltinEcho,
717        "mkdir": executeBuiltinMkdir,
718        "popd": executeBuiltinPopd,
719        "pushd": executeBuiltinPushd,
720        "rm": executeBuiltinRm,
721        ":": executeBuiltinColon,
722    }
723    # To avoid deadlock, we use a single stderr stream for piped
724    # output. This is null until we have seen some output using
725    # stderr.
726    for i, j in enumerate(cmd.commands):
727        # Reference the global environment by default.
728        cmd_shenv = shenv
729        args = list(j.args)
730        not_args = []
731        not_count = 0
732        not_crash = False
733        while True:
734            if args[0] == "env":
735                # Create a copy of the global environment and modify it for
736                # this one command. There might be multiple envs in a pipeline,
737                # and there might be multiple envs in a command (usually when
738                # one comes from a substitution):
739                #   env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s
740                #   env FOO=1 %{another_env_plus_cmd} | FileCheck %s
741                if cmd_shenv is shenv:
742                    cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env)
743                args = updateEnv(cmd_shenv, args)
744                if not args:
745                    # Return the environment variables if no argument is provided.
746                    env_str = "\n".join(
747                        f"{key}={value}" for key, value in sorted(cmd_shenv.env.items())
748                    )
749                    results.append(
750                        ShellCommandResult(
751                            j, env_str, "", 0, timeoutHelper.timeoutReached(), []
752                        )
753                    )
754                    return 0
755            elif args[0] == "not":
756                not_args.append(args.pop(0))
757                not_count += 1
758                if args and args[0] == "--crash":
759                    not_args.append(args.pop(0))
760                    not_crash = True
761                if not args:
762                    raise InternalShellError(j, "Error: 'not' requires a" " subcommand")
763            elif args[0] == "!":
764                not_args.append(args.pop(0))
765                not_count += 1
766                if not args:
767                    raise InternalShellError(j, "Error: '!' requires a" " subcommand")
768            else:
769                break
770
771        # Handle in-process builtins.
772        #
773        # Handle "echo" as a builtin if it is not part of a pipeline. This
774        # greatly speeds up tests that construct input files by repeatedly
775        # echo-appending to a file.
776        # FIXME: Standardize on the builtin echo implementation. We can use a
777        # temporary file to sidestep blocking pipe write issues.
778
779        # Ensure args[0] is hashable.
780        args[0] = expand_glob(args[0], cmd_shenv.cwd)[0]
781
782        inproc_builtin = inproc_builtins.get(args[0], None)
783        if inproc_builtin and (args[0] != "echo" or len(cmd.commands) == 1):
784            # env calling an in-process builtin is useless, so we take the safe
785            # approach of complaining.
786            if not cmd_shenv is shenv:
787                raise InternalShellError(
788                    j, "Error: 'env' cannot call '{}'".format(args[0])
789                )
790            if not_crash:
791                raise InternalShellError(
792                    j, "Error: 'not --crash' cannot call" " '{}'".format(args[0])
793                )
794            if len(cmd.commands) != 1:
795                raise InternalShellError(
796                    j,
797                    "Unsupported: '{}' cannot be part" " of a pipeline".format(args[0]),
798                )
799            result = inproc_builtin(Command(args, j.redirects), cmd_shenv)
800            if not_count % 2:
801                result.exitCode = int(not result.exitCode)
802            result.command.args = j.args
803            results.append(result)
804            return result.exitCode
805
806        # Resolve any out-of-process builtin command before adding back 'not'
807        # commands.
808        if args[0] in builtin_commands:
809            args.insert(0, sys.executable)
810            cmd_shenv.env["PYTHONPATH"] = os.path.dirname(os.path.abspath(__file__))
811            args[1] = os.path.join(builtin_commands_dir, args[1] + ".py")
812
813        # We had to search through the 'not' commands to find all the 'env'
814        # commands and any other in-process builtin command.  We don't want to
815        # reimplement 'not' and its '--crash' here, so just push all 'not'
816        # commands back to be called as external commands.  Because this
817        # approach effectively moves all 'env' commands up front, it relies on
818        # the assumptions that (1) environment variables are not intended to be
819        # relevant to 'not' commands and (2) the 'env' command should always
820        # blindly pass along the status it receives from any command it calls.
821
822        # For plain negations, either 'not' without '--crash', or the shell
823        # operator '!', leave them out from the command to execute and
824        # invert the result code afterwards.
825        if not_crash:
826            args = not_args + args
827            not_count = 0
828        else:
829            not_args = []
830
831        stdin, stdout, stderr = processRedirects(
832            j, default_stdin, cmd_shenv, opened_files
833        )
834
835        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
836        # stderr on a pipe and treat it as stdout.
837        if stderr == subprocess.STDOUT and stdout != subprocess.PIPE:
838            stderr = subprocess.PIPE
839            stderrIsStdout = True
840        else:
841            stderrIsStdout = False
842
843            # Don't allow stderr on a PIPE except for the last
844            # process, this could deadlock.
845            #
846            # FIXME: This is slow, but so is deadlock.
847            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
848                stderr = tempfile.TemporaryFile(mode="w+b")
849                stderrTempFiles.append((i, stderr))
850
851        # Resolve the executable path ourselves.
852        executable = None
853        # For paths relative to cwd, use the cwd of the shell environment.
854        if args[0].startswith("."):
855            exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
856            if os.path.isfile(exe_in_cwd):
857                executable = exe_in_cwd
858        if not executable:
859            executable = lit.util.which(args[0], cmd_shenv.env["PATH"])
860        if not executable:
861            raise InternalShellError(j, "%r: command not found" % args[0])
862
863        # Replace uses of /dev/null with temporary files.
864        if kAvoidDevNull:
865            # In Python 2.x, basestring is the base class for all string (including unicode)
866            # In Python 3.x, basestring no longer exist and str is always unicode
867            try:
868                str_type = basestring
869            except NameError:
870                str_type = str
871            for i, arg in enumerate(args):
872                if isinstance(arg, str_type) and kDevNull in arg:
873                    f = tempfile.NamedTemporaryFile(delete=False)
874                    f.close()
875                    named_temp_files.append(f.name)
876                    args[i] = arg.replace(kDevNull, f.name)
877
878        # Expand all glob expressions
879        args = expand_glob_expressions(args, cmd_shenv.cwd)
880
881        # On Windows, do our own command line quoting for better compatibility
882        # with some core utility distributions.
883        if kIsWindows:
884            args = quote_windows_command(args)
885
886        try:
887            procs.append(
888                subprocess.Popen(
889                    args,
890                    cwd=cmd_shenv.cwd,
891                    executable=executable,
892                    stdin=stdin,
893                    stdout=stdout,
894                    stderr=stderr,
895                    env=cmd_shenv.env,
896                    close_fds=kUseCloseFDs,
897                    universal_newlines=True,
898                    errors="replace",
899                )
900            )
901            proc_not_counts.append(not_count)
902            # Let the helper know about this process
903            timeoutHelper.addProcess(procs[-1])
904        except OSError as e:
905            raise InternalShellError(
906                j, "Could not create process ({}) due to {}".format(executable, e)
907            )
908
909        # Immediately close stdin for any process taking stdin from us.
910        if stdin == subprocess.PIPE:
911            procs[-1].stdin.close()
912            procs[-1].stdin = None
913
914        # Update the current stdin source.
915        if stdout == subprocess.PIPE:
916            default_stdin = procs[-1].stdout
917        elif stderrIsStdout:
918            default_stdin = procs[-1].stderr
919        else:
920            default_stdin = subprocess.PIPE
921
922    # Explicitly close any redirected files. We need to do this now because we
923    # need to release any handles we may have on the temporary files (important
924    # on Win32, for example). Since we have already spawned the subprocess, our
925    # handles have already been transferred so we do not need them anymore.
926    for (name, mode, f, path) in opened_files:
927        f.close()
928
929    # FIXME: There is probably still deadlock potential here. Yawn.
930    procData = [None] * len(procs)
931    procData[-1] = procs[-1].communicate()
932
933    for i in range(len(procs) - 1):
934        if procs[i].stdout is not None:
935            out = procs[i].stdout.read()
936        else:
937            out = ""
938        if procs[i].stderr is not None:
939            err = procs[i].stderr.read()
940        else:
941            err = ""
942        procData[i] = (out, err)
943
944    # Read stderr out of the temp files.
945    for i, f in stderrTempFiles:
946        f.seek(0, 0)
947        procData[i] = (procData[i][0], f.read())
948        f.close()
949
950    exitCode = None
951    for i, (out, err) in enumerate(procData):
952        res = procs[i].wait()
953        # Detect Ctrl-C in subprocess.
954        if res == -signal.SIGINT:
955            raise KeyboardInterrupt
956        if proc_not_counts[i] % 2:
957            res = 1 if res == 0 else 0
958        elif proc_not_counts[i] > 1:
959            res = 1 if res != 0 else 0
960
961        # Ensure the resulting output is always of string type.
962        try:
963            if out is None:
964                out = ""
965            else:
966                out = to_string(out.decode("utf-8", errors="replace"))
967        except:
968            out = str(out)
969        try:
970            if err is None:
971                err = ""
972            else:
973                err = to_string(err.decode("utf-8", errors="replace"))
974        except:
975            err = str(err)
976
977        # Gather the redirected output files for failed commands.
978        output_files = []
979        if res != 0:
980            for (name, mode, f, path) in sorted(opened_files):
981                if path is not None and mode in ("w", "a"):
982                    try:
983                        with open(path, "rb") as f:
984                            data = f.read()
985                    except:
986                        data = None
987                    if data is not None:
988                        output_files.append((name, path, data))
989
990        results.append(
991            ShellCommandResult(
992                cmd.commands[i],
993                out,
994                err,
995                res,
996                timeoutHelper.timeoutReached(),
997                output_files,
998            )
999        )
1000        if cmd.pipe_err:
1001            # Take the last failing exit code from the pipeline.
1002            if not exitCode or res != 0:
1003                exitCode = res
1004        else:
1005            exitCode = res
1006
1007    # Remove any named temporary files we created.
1008    for f in named_temp_files:
1009        try:
1010            os.remove(f)
1011        except OSError:
1012            pass
1013
1014    if cmd.negate:
1015        exitCode = not exitCode
1016
1017    return exitCode
1018
1019
1020def findColor(line, curr_color):
1021    start = line.rfind("\33[")
1022    if start == -1:
1023        return curr_color
1024    end = line.find("m", start + 2)
1025    if end == -1:
1026        return curr_color
1027    match = line[start : end + 1]
1028    # "\33[0m" means "reset all formatting". Sometimes the 0 is skipped.
1029    if match == "\33[m" or match == "\33[0m":
1030        return None
1031    return match
1032
1033
1034def formatOutput(title, data, limit=None):
1035    if not data.strip():
1036        return ""
1037    if not limit is None and len(data) > limit:
1038        data = data[:limit] + "\n...\n"
1039        msg = "data was truncated"
1040    else:
1041        msg = ""
1042    ndashes = 30
1043    # fmt: off
1044    out = f"# .---{title}{'-' * (ndashes - 4 - len(title))}\n"
1045    curr_color = None
1046    for line in data.splitlines():
1047        if curr_color:
1048            out += "\33[0m"
1049        out += "# | "
1050        if curr_color:
1051            out += curr_color
1052        out += line + "\n"
1053        curr_color = findColor(line, curr_color)
1054    if curr_color:
1055        out += "\33[0m"  # prevent unterminated formatting from leaking
1056    out += f"# `---{msg}{'-' * (ndashes - 4 - len(msg))}\n"
1057    # fmt: on
1058    return out
1059
1060
1061# Always either returns the tuple (out, err, exitCode, timeoutInfo) or raises a
1062# ScriptFatal exception.
1063#
1064# If debug is True (the normal lit behavior), err is empty, and out contains an
1065# execution trace, including stdout and stderr shown per command executed.
1066#
1067# If debug is False (set by some custom lit test formats that call this
1068# function), out contains only stdout from the script, err contains only stderr
1069# from the script, and there is no execution trace.
1070def executeScriptInternal(
1071    test, litConfig, tmpBase, commands, cwd, debug=True
1072) -> Tuple[str, str, int, Optional[str]]:
1073    cmds = []
1074    for i, ln in enumerate(commands):
1075        # Within lit, we try to always add '%dbg(...)' to command lines in order
1076        # to maximize debuggability.  However, custom lit test formats might not
1077        # always add it, so add a generic debug message in that case.
1078        match = re.fullmatch(kPdbgRegex, ln)
1079        if match:
1080            dbg = match.group(1)
1081            command = match.group(2)
1082        else:
1083            dbg = "command line"
1084            command = ln
1085        if debug:
1086            ln = f"@echo '# {dbg}' "
1087            if command:
1088                ln += f"&& @echo {shlex.quote(command.lstrip())} && {command}"
1089            else:
1090                ln += "has no command after substitutions"
1091        else:
1092            ln = command
1093        try:
1094            cmds.append(
1095                ShUtil.ShParser(ln, litConfig.isWindows, test.config.pipefail).parse()
1096            )
1097        except:
1098            raise ScriptFatal(
1099                f"shell parser error on {dbg}: {command.lstrip()}\n"
1100            ) from None
1101
1102    cmd = cmds[0]
1103    for c in cmds[1:]:
1104        cmd = ShUtil.Seq(cmd, "&&", c)
1105
1106    results = []
1107    timeoutInfo = None
1108    try:
1109        shenv = ShellEnvironment(cwd, test.config.environment)
1110        exitCode, timeoutInfo = executeShCmd(
1111            cmd, shenv, results, timeout=litConfig.maxIndividualTestTime
1112        )
1113    except InternalShellError:
1114        e = sys.exc_info()[1]
1115        exitCode = 127
1116        results.append(ShellCommandResult(e.command, "", e.message, exitCode, False))
1117
1118    out = err = ""
1119    for i, result in enumerate(results):
1120        if not debug:
1121            out += result.stdout
1122            err += result.stderr
1123            continue
1124
1125        # The purpose of an "@echo" command is merely to add a debugging message
1126        # directly to lit's output.  It is used internally by lit's internal
1127        # shell and is not currently documented for use in lit tests.  However,
1128        # if someone misuses it (e.g., both "echo" and "@echo" complain about
1129        # stdin redirection), produce the normal execution trace to facilitate
1130        # debugging.
1131        if (
1132            result.command.args[0] == "@echo"
1133            and result.exitCode == 0
1134            and not result.stderr
1135            and not result.outputFiles
1136            and not result.timeoutReached
1137        ):
1138            out += result.stdout
1139            continue
1140
1141        # Write the command line that was run.  Properly quote it.  Leading
1142        # "!" commands should not be quoted as that would indicate they are not
1143        # the builtins.
1144        out += "# executed command: "
1145        nLeadingBangs = next(
1146            (i for i, cmd in enumerate(result.command.args) if cmd != "!"),
1147            len(result.command.args),
1148        )
1149        out += "! " * nLeadingBangs
1150        out += " ".join(
1151            shlex.quote(str(s))
1152            for i, s in enumerate(result.command.args)
1153            if i >= nLeadingBangs
1154        )
1155        out += "\n"
1156
1157        # If nothing interesting happened, move on.
1158        if (
1159            litConfig.maxIndividualTestTime == 0
1160            and result.exitCode == 0
1161            and not result.stdout.strip()
1162            and not result.stderr.strip()
1163        ):
1164            continue
1165
1166        # Otherwise, something failed or was printed, show it.
1167
1168        # Add the command output, if redirected.
1169        for (name, path, data) in result.outputFiles:
1170            data = to_string(data.decode("utf-8", errors="replace"))
1171            out += formatOutput(f"redirected output from '{name}'", data, limit=1024)
1172        if result.stdout.strip():
1173            out += formatOutput("command stdout", result.stdout)
1174        if result.stderr.strip():
1175            out += formatOutput("command stderr", result.stderr)
1176        if not result.stdout.strip() and not result.stderr.strip():
1177            out += "# note: command had no output on stdout or stderr\n"
1178
1179        # Show the error conditions:
1180        if result.exitCode != 0:
1181            # On Windows, a negative exit code indicates a signal, and those are
1182            # easier to recognize or look up if we print them in hex.
1183            if litConfig.isWindows and (result.exitCode < 0 or result.exitCode > 255):
1184                codeStr = hex(int(result.exitCode & 0xFFFFFFFF)).rstrip("L")
1185            else:
1186                codeStr = str(result.exitCode)
1187            out += "# error: command failed with exit status: %s\n" % (codeStr,)
1188        if litConfig.maxIndividualTestTime > 0 and result.timeoutReached:
1189            out += "# error: command reached timeout: %s\n" % (
1190                str(result.timeoutReached),
1191            )
1192
1193    return out, err, exitCode, timeoutInfo
1194
1195
1196def executeScript(test, litConfig, tmpBase, commands, cwd):
1197    bashPath = litConfig.getBashPath()
1198    isWin32CMDEXE = litConfig.isWindows and not bashPath
1199    script = tmpBase + ".script"
1200    if isWin32CMDEXE:
1201        script += ".bat"
1202
1203    # Write script file
1204    mode = "w"
1205    open_kwargs = {}
1206    if litConfig.isWindows and not isWin32CMDEXE:
1207        mode += "b"  # Avoid CRLFs when writing bash scripts.
1208    else:
1209        open_kwargs["encoding"] = "utf-8"
1210    f = open(script, mode, **open_kwargs)
1211    if isWin32CMDEXE:
1212        for i, ln in enumerate(commands):
1213            match = re.fullmatch(kPdbgRegex, ln)
1214            if match:
1215                command = match.group(2)
1216                commands[i] = match.expand(
1217                    "echo '\\1' > nul && " if command else "echo '\\1' > nul"
1218                )
1219        f.write("@echo on\n")
1220        f.write("\n@if %ERRORLEVEL% NEQ 0 EXIT\n".join(commands))
1221    else:
1222        for i, ln in enumerate(commands):
1223            match = re.fullmatch(kPdbgRegex, ln)
1224            if match:
1225                dbg = match.group(1)
1226                command = match.group(2)
1227                # Echo the debugging diagnostic to stderr.
1228                #
1229                # For that echo command, use 'set' commands to suppress the
1230                # shell's execution trace, which would just add noise.  Suppress
1231                # the shell's execution trace for the 'set' commands by
1232                # redirecting their stderr to /dev/null.
1233                if command:
1234                    msg = f"'{dbg}': {shlex.quote(command.lstrip())}"
1235                else:
1236                    msg = f"'{dbg}' has no command after substitutions"
1237                commands[i] = (
1238                    f"{{ set +x; }} 2>/dev/null && "
1239                    f"echo {msg} >&2 && "
1240                    f"{{ set -x; }} 2>/dev/null"
1241                )
1242                # Execute the command, if any.
1243                #
1244                # 'command' might be something like:
1245                #
1246                #   subcmd & PID=$!
1247                #
1248                # In that case, we need something like:
1249                #
1250                #   echo_dbg && { subcmd & PID=$!; }
1251                #
1252                # Without the '{ ...; }' enclosing the original 'command', '&'
1253                # would put all of 'echo_dbg && subcmd' in the background.  This
1254                # would cause 'echo_dbg' to execute at the wrong time, and a
1255                # later kill of $PID would target the wrong process. We have
1256                # seen the latter manage to terminate the shell running lit.
1257                if command:
1258                    commands[i] += f" && {{ {command}; }}"
1259        if test.config.pipefail:
1260            f.write(b"set -o pipefail;" if mode == "wb" else "set -o pipefail;")
1261
1262        # Manually export any DYLD_* variables used by dyld on macOS because
1263        # otherwise they are lost when the shell executable is run, before the
1264        # lit test is executed.
1265        env_str = "\n".join(
1266            "export {}={};".format(k, shlex.quote(v))
1267            for k, v in test.config.environment.items()
1268            if k.startswith("DYLD_")
1269        )
1270        f.write(bytes(env_str, "utf-8") if mode == "wb" else env_str)
1271        f.write(b"set -x;" if mode == "wb" else "set -x;")
1272        if mode == "wb":
1273            f.write(bytes("{ " + "; } &&\n{ ".join(commands) + "; }", "utf-8"))
1274        else:
1275            f.write("{ " + "; } &&\n{ ".join(commands) + "; }")
1276    f.write(b"\n" if mode == "wb" else "\n")
1277    f.close()
1278
1279    if isWin32CMDEXE:
1280        command = ["cmd", "/c", script]
1281    else:
1282        if bashPath:
1283            command = [bashPath, script]
1284        else:
1285            command = ["/bin/sh", script]
1286        if litConfig.useValgrind:
1287            # FIXME: Running valgrind on sh is overkill. We probably could just
1288            # run on clang with no real loss.
1289            command = litConfig.valgrindArgs + command
1290
1291    try:
1292        out, err, exitCode = lit.util.executeCommand(
1293            command,
1294            cwd=cwd,
1295            env=test.config.environment,
1296            timeout=litConfig.maxIndividualTestTime,
1297        )
1298        return (out, err, exitCode, None)
1299    except lit.util.ExecuteCommandTimeoutException as e:
1300        return (e.out, e.err, e.exitCode, e.msg)
1301
1302
1303def parseIntegratedTestScriptCommands(source_path, keywords):
1304    """
1305    parseIntegratedTestScriptCommands(source_path) -> commands
1306
1307    Parse the commands in an integrated test script file into a list of
1308    (line_number, command_type, line).
1309    """
1310
1311    # This code is carefully written to be dual compatible with Python 2.5+ and
1312    # Python 3 without requiring input files to always have valid codings. The
1313    # trick we use is to open the file in binary mode and use the regular
1314    # expression library to find the commands, with it scanning strings in
1315    # Python2 and bytes in Python3.
1316    #
1317    # Once we find a match, we do require each script line to be decodable to
1318    # UTF-8, so we convert the outputs to UTF-8 before returning. This way the
1319    # remaining code can work with "strings" agnostic of the executing Python
1320    # version.
1321
1322    keywords_re = re.compile(
1323        to_bytes("(%s)(.*)\n" % ("|".join(re.escape(k) for k in keywords),))
1324    )
1325
1326    f = open(source_path, "rb")
1327    try:
1328        # Read the entire file contents.
1329        data = f.read()
1330
1331        # Ensure the data ends with a newline.
1332        if not data.endswith(to_bytes("\n")):
1333            data = data + to_bytes("\n")
1334
1335        # Iterate over the matches.
1336        line_number = 1
1337        last_match_position = 0
1338        for match in keywords_re.finditer(data):
1339            # Compute the updated line number by counting the intervening
1340            # newlines.
1341            match_position = match.start()
1342            line_number += data.count(
1343                to_bytes("\n"), last_match_position, match_position
1344            )
1345            last_match_position = match_position
1346
1347            # Convert the keyword and line to UTF-8 strings and yield the
1348            # command. Note that we take care to return regular strings in
1349            # Python 2, to avoid other code having to differentiate between the
1350            # str and unicode types.
1351            #
1352            # Opening the file in binary mode prevented Windows \r newline
1353            # characters from being converted to Unix \n newlines, so manually
1354            # strip those from the yielded lines.
1355            keyword, ln = match.groups()
1356            yield (
1357                line_number,
1358                to_string(keyword.decode("utf-8")),
1359                to_string(ln.decode("utf-8").rstrip("\r")),
1360            )
1361    finally:
1362        f.close()
1363
1364
1365def getTempPaths(test):
1366    """Get the temporary location, this is always relative to the test suite
1367    root, not test source root."""
1368    execpath = test.getExecPath()
1369    execdir, execbase = os.path.split(execpath)
1370    tmpDir = os.path.join(execdir, "Output")
1371    tmpBase = os.path.join(tmpDir, execbase)
1372    return tmpDir, tmpBase
1373
1374
1375def colonNormalizePath(path):
1376    if kIsWindows:
1377        return re.sub(r"^(.):", r"\1", path.replace("\\", "/"))
1378    else:
1379        assert path[0] == "/"
1380        return path[1:]
1381
1382
1383def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
1384    sourcepath = test.getSourcePath()
1385    sourcedir = os.path.dirname(sourcepath)
1386
1387    # Normalize slashes, if requested.
1388    if normalize_slashes:
1389        sourcepath = sourcepath.replace("\\", "/")
1390        sourcedir = sourcedir.replace("\\", "/")
1391        tmpDir = tmpDir.replace("\\", "/")
1392        tmpBase = tmpBase.replace("\\", "/")
1393
1394    substitutions = []
1395    substitutions.extend(test.config.substitutions)
1396    tmpName = tmpBase + ".tmp"
1397    tmpBaseName = os.path.basename(tmpBase)
1398    sourceBaseName = os.path.basename(sourcepath)
1399
1400    substitutions.append(("%{pathsep}", os.pathsep))
1401    substitutions.append(("%basename_t", tmpBaseName))
1402
1403    substitutions.append(("%{s:basename}", sourceBaseName))
1404    substitutions.append(("%{t:stem}", tmpBaseName))
1405
1406    substitutions.extend(
1407        [
1408            ("%{fs-src-root}", pathlib.Path(sourcedir).anchor),
1409            ("%{fs-tmp-root}", pathlib.Path(tmpBase).anchor),
1410            ("%{fs-sep}", os.path.sep),
1411        ]
1412    )
1413
1414    substitutions.append(("%/et", tmpName.replace("\\", "\\\\\\\\\\\\\\\\")))
1415
1416    def regex_escape(s):
1417        s = s.replace("@", r"\@")
1418        s = s.replace("&", r"\&")
1419        return s
1420
1421    path_substitutions = [
1422        ("s", sourcepath), ("S", sourcedir), ("p", sourcedir),
1423        ("t", tmpName), ("T", tmpDir)
1424    ]
1425    for path_substitution in path_substitutions:
1426        letter = path_substitution[0]
1427        path = path_substitution[1]
1428
1429        # Original path variant
1430        substitutions.append(("%" + letter, path))
1431
1432        # Normalized path separator variant
1433        substitutions.append(("%/" + letter, path.replace("\\", "/")))
1434
1435        # realpath variants
1436        # Windows paths with substitute drives are not expanded by default
1437        # as they are used to avoid MAX_PATH issues, but sometimes we do
1438        # need the fully expanded path.
1439        real_path = os.path.realpath(path)
1440        substitutions.append(("%{" + letter + ":real}", real_path))
1441        substitutions.append(("%{/" + letter + ":real}",
1442            real_path.replace("\\", "/")))
1443
1444        # "%{/[STpst]:regex_replacement}" should be normalized like
1445        # "%/[STpst]" but we're also in a regex replacement context
1446        # of a s@@@ regex.
1447        substitutions.append(
1448            ("%{/" + letter + ":regex_replacement}",
1449            regex_escape(path.replace("\\", "/"))))
1450
1451        # "%:[STpst]" are normalized paths without colons and without
1452        # a leading slash.
1453        substitutions.append(("%:" + letter, colonNormalizePath(path)))
1454
1455    return substitutions
1456
1457
1458def _memoize(f):
1459    cache = {}  # Intentionally unbounded, see applySubstitutions()
1460
1461    def memoized(x):
1462        if x not in cache:
1463            cache[x] = f(x)
1464        return cache[x]
1465
1466    return memoized
1467
1468
1469@_memoize
1470def _caching_re_compile(r):
1471    return re.compile(r)
1472
1473
1474class ExpandableScriptDirective(object):
1475    """
1476    Common interface for lit directives for which any lit substitutions must be
1477    expanded to produce the shell script.  It includes directives (e.g., 'RUN:')
1478    specifying shell commands that might have lit substitutions to be expanded.
1479    It also includes lit directives (e.g., 'DEFINE:') that adjust substitutions.
1480
1481    start_line_number: The directive's starting line number.
1482    end_line_number: The directive's ending line number, which is
1483        start_line_number if the directive has no line continuations.
1484    keyword: The keyword that specifies the directive.  For example, 'RUN:'.
1485    """
1486
1487    def __init__(self, start_line_number, end_line_number, keyword):
1488        # Input line number where the directive starts.
1489        self.start_line_number = start_line_number
1490        # Input line number where the directive ends.
1491        self.end_line_number = end_line_number
1492        # The keyword used to indicate the directive.
1493        self.keyword = keyword
1494
1495    def add_continuation(self, line_number, keyword, line):
1496        """
1497        Add a continuation line to this directive and return True, or do nothing
1498        and return False if the specified line is not a continuation for this
1499        directive (e.g., previous line does not end in '\', or keywords do not
1500        match).
1501
1502        line_number: The line number for the continuation line.
1503        keyword: The keyword that specifies the continuation line.  For example,
1504            'RUN:'.
1505        line: The content of the continuation line after the keyword.
1506        """
1507        assert False, "expected method to be called on derived class"
1508
1509    def needs_continuation(self):
1510        """
1511        Does this directive require a continuation line?
1512
1513        '\' is documented as indicating a line continuation even if whitespace
1514        separates it from the newline.  It looks like a line continuation, and
1515        it would be confusing if it didn't behave as one.
1516        """
1517        assert False, "expected method to be called on derived class"
1518
1519    def get_location(self):
1520        """
1521        Get a phrase describing the line or range of lines so far included by
1522        this directive and any line continuations.
1523        """
1524        if self.start_line_number == self.end_line_number:
1525            return f"at line {self.start_line_number}"
1526        return f"from line {self.start_line_number} to {self.end_line_number}"
1527
1528
1529class CommandDirective(ExpandableScriptDirective):
1530    """
1531    A lit directive taking a shell command line.  For example,
1532    'RUN: echo hello world'.
1533
1534    command: The content accumulated so far from the directive and its
1535        continuation lines.
1536    """
1537
1538    def __init__(self, start_line_number, end_line_number, keyword, line):
1539        super().__init__(start_line_number, end_line_number, keyword)
1540        self.command = line.rstrip()
1541
1542    def add_continuation(self, line_number, keyword, line):
1543        if keyword != self.keyword or not self.needs_continuation():
1544            return False
1545        self.command = self.command[:-1] + line.rstrip()
1546        self.end_line_number = line_number
1547        return True
1548
1549    def needs_continuation(self):
1550        # Trailing whitespace is stripped immediately when each line is added,
1551        # so '\' is never hidden here.
1552        return self.command[-1] == "\\"
1553
1554
1555class SubstDirective(ExpandableScriptDirective):
1556    """
1557    A lit directive taking a substitution definition or redefinition.  For
1558    example, 'DEFINE: %{name} = value'.
1559
1560    new_subst: True if this directive defines a new substitution.  False if it
1561        redefines an existing substitution.
1562    body: The unparsed content accumulated so far from the directive and its
1563        continuation lines.
1564    name: The substitution's name, or None if more continuation lines are still
1565        required.
1566    value: The substitution's value, or None if more continuation lines are
1567        still required.
1568    """
1569
1570    def __init__(self, start_line_number, end_line_number, keyword, new_subst, line):
1571        super().__init__(start_line_number, end_line_number, keyword)
1572        self.new_subst = new_subst
1573        self.body = line
1574        self.name = None
1575        self.value = None
1576        self._parse_body()
1577
1578    def add_continuation(self, line_number, keyword, line):
1579        if keyword != self.keyword or not self.needs_continuation():
1580            return False
1581        if not line.strip():
1582            raise ValueError("Substitution's continuation is empty")
1583        # Append line.  Replace the '\' and any adjacent whitespace with a
1584        # single space.
1585        self.body = self.body.rstrip()[:-1].rstrip() + " " + line.lstrip()
1586        self.end_line_number = line_number
1587        self._parse_body()
1588        return True
1589
1590    def needs_continuation(self):
1591        return self.body.rstrip()[-1:] == "\\"
1592
1593    def _parse_body(self):
1594        """
1595        If no more line continuations are required, parse all the directive's
1596        accumulated lines in order to identify the substitution's name and full
1597        value, and raise an exception if invalid.
1598        """
1599        if self.needs_continuation():
1600            return
1601
1602        # Extract the left-hand side and value, and discard any whitespace
1603        # enclosing each.
1604        parts = self.body.split("=", 1)
1605        if len(parts) == 1:
1606            raise ValueError("Substitution's definition does not contain '='")
1607        self.name = parts[0].strip()
1608        self.value = parts[1].strip()
1609
1610        # Check the substitution's name.
1611        #
1612        # Do not extend this to permit '.' or any sequence that's special in a
1613        # python pattern.  We could escape that automatically for
1614        # DEFINE/REDEFINE directives in test files.  However, lit configuration
1615        # file authors would still have to remember to escape them manually in
1616        # substitution names but not in values.  Moreover, the manually chosen
1617        # and automatically chosen escape sequences would have to be consistent
1618        # (e.g., '\.' vs. '[.]') in order for REDEFINE to successfully redefine
1619        # a substitution previously defined by a lit configuration file.  All
1620        # this seems too error prone and confusing to be worthwhile.  If you
1621        # want your name to express structure, use ':' instead of '.'.
1622        #
1623        # Actually, '{' and '}' are special if they contain only digits possibly
1624        # separated by a comma.  Requiring a leading letter avoids that.
1625        if not re.fullmatch(r"%{[_a-zA-Z][-_:0-9a-zA-Z]*}", self.name):
1626            raise ValueError(
1627                f"Substitution name '{self.name}' is malformed as it must "
1628                f"start with '%{{', it must end with '}}', and the rest must "
1629                f"start with a letter or underscore and contain only "
1630                f"alphanumeric characters, hyphens, underscores, and colons"
1631            )
1632
1633    def adjust_substitutions(self, substitutions):
1634        """
1635        Modify the specified substitution list as specified by this directive.
1636        """
1637        assert (
1638            not self.needs_continuation()
1639        ), "expected directive continuations to be parsed before applying"
1640        existing = [i for i, subst in enumerate(substitutions) if self.name in subst[0]]
1641        existing_res = "".join(
1642            "\nExisting pattern: " + substitutions[i][0] for i in existing
1643        )
1644        if self.new_subst:
1645            if existing:
1646                raise ValueError(
1647                    f"Substitution whose pattern contains '{self.name}' is "
1648                    f"already defined before '{self.keyword}' directive "
1649                    f"{self.get_location()}"
1650                    f"{existing_res}"
1651                )
1652            substitutions.insert(0, (self.name, self.value))
1653            return
1654        if len(existing) > 1:
1655            raise ValueError(
1656                f"Multiple substitutions whose patterns contain '{self.name}' "
1657                f"are defined before '{self.keyword}' directive "
1658                f"{self.get_location()}"
1659                f"{existing_res}"
1660            )
1661        if not existing:
1662            raise ValueError(
1663                f"No substitution for '{self.name}' is defined before "
1664                f"'{self.keyword}' directive {self.get_location()}"
1665            )
1666        if substitutions[existing[0]][0] != self.name:
1667            raise ValueError(
1668                f"Existing substitution whose pattern contains '{self.name}' "
1669                f"does not have the pattern specified by '{self.keyword}' "
1670                f"directive {self.get_location()}\n"
1671                f"Expected pattern: {self.name}"
1672                f"{existing_res}"
1673            )
1674        substitutions[existing[0]] = (self.name, self.value)
1675
1676
1677def applySubstitutions(script, substitutions, conditions={}, recursion_limit=None):
1678    """
1679    Apply substitutions to the script.  Allow full regular expression syntax.
1680    Replace each matching occurrence of regular expression pattern a with
1681    substitution b in line ln.
1682
1683    If a substitution expands into another substitution, it is expanded
1684    recursively until the line has no more expandable substitutions. If
1685    the line can still can be substituted after being substituted
1686    `recursion_limit` times, it is an error. If the `recursion_limit` is
1687    `None` (the default), no recursive substitution is performed at all.
1688    """
1689
1690    # We use #_MARKER_# to hide %% while we do the other substitutions.
1691    def escapePercents(ln):
1692        return _caching_re_compile("%%").sub("#_MARKER_#", ln)
1693
1694    def unescapePercents(ln):
1695        return _caching_re_compile("#_MARKER_#").sub("%", ln)
1696
1697    def substituteIfElse(ln):
1698        # early exit to avoid wasting time on lines without
1699        # conditional substitutions
1700        if ln.find("%if ") == -1:
1701            return ln
1702
1703        def tryParseIfCond(ln):
1704            # space is important to not conflict with other (possible)
1705            # substitutions
1706            if not ln.startswith("%if "):
1707                return None, ln
1708            ln = ln[4:]
1709
1710            # stop at '%{'
1711            match = _caching_re_compile("%{").search(ln)
1712            if not match:
1713                raise ValueError("'%{' is missing for %if substitution")
1714            cond = ln[: match.start()]
1715
1716            # eat '%{' as well
1717            ln = ln[match.end() :]
1718            return cond, ln
1719
1720        def tryParseElse(ln):
1721            match = _caching_re_compile(r"^\s*%else\s*(%{)?").search(ln)
1722            if not match:
1723                return False, ln
1724            if not match.group(1):
1725                raise ValueError("'%{' is missing for %else substitution")
1726            return True, ln[match.end() :]
1727
1728        def tryParseEnd(ln):
1729            if ln.startswith("%}"):
1730                return True, ln[2:]
1731            return False, ln
1732
1733        def parseText(ln, isNested):
1734            # parse everything until %if, or %} if we're parsing a
1735            # nested expression.
1736            match = _caching_re_compile(
1737                "(.*?)(?:%if|%})" if isNested else "(.*?)(?:%if)"
1738            ).search(ln)
1739            if not match:
1740                # there is no terminating pattern, so treat the whole
1741                # line as text
1742                return ln, ""
1743            text_end = match.end(1)
1744            return ln[:text_end], ln[text_end:]
1745
1746        def parseRecursive(ln, isNested):
1747            result = ""
1748            while len(ln):
1749                if isNested:
1750                    found_end, _ = tryParseEnd(ln)
1751                    if found_end:
1752                        break
1753
1754                # %if cond %{ branch_if %} %else %{ branch_else %}
1755                cond, ln = tryParseIfCond(ln)
1756                if cond:
1757                    branch_if, ln = parseRecursive(ln, isNested=True)
1758                    found_end, ln = tryParseEnd(ln)
1759                    if not found_end:
1760                        raise ValueError("'%}' is missing for %if substitution")
1761
1762                    branch_else = ""
1763                    found_else, ln = tryParseElse(ln)
1764                    if found_else:
1765                        branch_else, ln = parseRecursive(ln, isNested=True)
1766                        found_end, ln = tryParseEnd(ln)
1767                        if not found_end:
1768                            raise ValueError("'%}' is missing for %else substitution")
1769
1770                    if BooleanExpression.evaluate(cond, conditions):
1771                        result += branch_if
1772                    else:
1773                        result += branch_else
1774                    continue
1775
1776                # The rest is handled as plain text.
1777                text, ln = parseText(ln, isNested)
1778                result += text
1779
1780            return result, ln
1781
1782        result, ln = parseRecursive(ln, isNested=False)
1783        assert len(ln) == 0
1784        return result
1785
1786    def processLine(ln):
1787        # Apply substitutions
1788        ln = substituteIfElse(escapePercents(ln))
1789        for a, b in substitutions:
1790            b = b.replace("\\", "\\\\")
1791            # re.compile() has a built-in LRU cache with 512 entries. In some
1792            # test suites lit ends up thrashing that cache, which made e.g.
1793            # check-llvm run 50% slower.  Use an explicit, unbounded cache
1794            # to prevent that from happening.  Since lit is fairly
1795            # short-lived, since the set of substitutions is fairly small, and
1796            # since thrashing has such bad consequences, not bounding the cache
1797            # seems reasonable.
1798            ln = _caching_re_compile(a).sub(str(b), escapePercents(ln))
1799
1800        # Strip the trailing newline and any extra whitespace.
1801        return ln.strip()
1802
1803    def processLineToFixedPoint(ln):
1804        assert isinstance(recursion_limit, int) and recursion_limit >= 0
1805        origLine = ln
1806        steps = 0
1807        processed = processLine(ln)
1808        while processed != ln and steps < recursion_limit:
1809            ln = processed
1810            processed = processLine(ln)
1811            steps += 1
1812
1813        if processed != ln:
1814            raise ValueError(
1815                "Recursive substitution of '%s' did not complete "
1816                "in the provided recursion limit (%s)" % (origLine, recursion_limit)
1817            )
1818
1819        return processed
1820
1821    process = processLine if recursion_limit is None else processLineToFixedPoint
1822    output = []
1823    for directive in script:
1824        if isinstance(directive, SubstDirective):
1825            directive.adjust_substitutions(substitutions)
1826        else:
1827            if isinstance(directive, CommandDirective):
1828                line = directive.command
1829            else:
1830                # Can come from preamble_commands.
1831                assert isinstance(directive, str)
1832                line = directive
1833            output.append(unescapePercents(process(line)))
1834
1835    return output
1836
1837
1838class ParserKind(object):
1839    """
1840    An enumeration representing the style of an integrated test keyword or
1841    command.
1842
1843    TAG: A keyword taking no value. Ex 'END.'
1844    COMMAND: A keyword taking a list of shell commands. Ex 'RUN:'
1845    LIST: A keyword taking a comma-separated list of values.
1846    SPACE_LIST: A keyword taking a space-separated list of values.
1847    BOOLEAN_EXPR: A keyword taking a comma-separated list of
1848        boolean expressions. Ex 'XFAIL:'
1849    INTEGER: A keyword taking a single integer. Ex 'ALLOW_RETRIES:'
1850    CUSTOM: A keyword with custom parsing semantics.
1851    DEFINE: A keyword taking a new lit substitution definition. Ex
1852        'DEFINE: %{name}=value'
1853    REDEFINE: A keyword taking a lit substitution redefinition. Ex
1854        'REDEFINE: %{name}=value'
1855    """
1856
1857    TAG = 0
1858    COMMAND = 1
1859    LIST = 2
1860    SPACE_LIST = 3
1861    BOOLEAN_EXPR = 4
1862    INTEGER = 5
1863    CUSTOM = 6
1864    DEFINE = 7
1865    REDEFINE = 8
1866
1867    @staticmethod
1868    def allowedKeywordSuffixes(value):
1869        return {
1870            ParserKind.TAG: ["."],
1871            ParserKind.COMMAND: [":"],
1872            ParserKind.LIST: [":"],
1873            ParserKind.SPACE_LIST: [":"],
1874            ParserKind.BOOLEAN_EXPR: [":"],
1875            ParserKind.INTEGER: [":"],
1876            ParserKind.CUSTOM: [":", "."],
1877            ParserKind.DEFINE: [":"],
1878            ParserKind.REDEFINE: [":"],
1879        }[value]
1880
1881    @staticmethod
1882    def str(value):
1883        return {
1884            ParserKind.TAG: "TAG",
1885            ParserKind.COMMAND: "COMMAND",
1886            ParserKind.LIST: "LIST",
1887            ParserKind.SPACE_LIST: "SPACE_LIST",
1888            ParserKind.BOOLEAN_EXPR: "BOOLEAN_EXPR",
1889            ParserKind.INTEGER: "INTEGER",
1890            ParserKind.CUSTOM: "CUSTOM",
1891            ParserKind.DEFINE: "DEFINE",
1892            ParserKind.REDEFINE: "REDEFINE",
1893        }[value]
1894
1895
1896class IntegratedTestKeywordParser(object):
1897    """A parser for LLVM/Clang style integrated test scripts.
1898
1899    keyword: The keyword to parse for. It must end in either '.' or ':'.
1900    kind: An value of ParserKind.
1901    parser: A custom parser. This value may only be specified with
1902            ParserKind.CUSTOM.
1903    """
1904
1905    def __init__(self, keyword, kind, parser=None, initial_value=None):
1906        allowedSuffixes = ParserKind.allowedKeywordSuffixes(kind)
1907        if len(keyword) == 0 or keyword[-1] not in allowedSuffixes:
1908            if len(allowedSuffixes) == 1:
1909                raise ValueError(
1910                    "Keyword '%s' of kind '%s' must end in '%s'"
1911                    % (keyword, ParserKind.str(kind), allowedSuffixes[0])
1912                )
1913            else:
1914                raise ValueError(
1915                    "Keyword '%s' of kind '%s' must end in "
1916                    " one of '%s'"
1917                    % (keyword, ParserKind.str(kind), " ".join(allowedSuffixes))
1918                )
1919
1920        if parser is not None and kind != ParserKind.CUSTOM:
1921            raise ValueError(
1922                "custom parsers can only be specified with " "ParserKind.CUSTOM"
1923            )
1924        self.keyword = keyword
1925        self.kind = kind
1926        self.parsed_lines = []
1927        self.value = initial_value
1928        self.parser = parser
1929
1930        if kind == ParserKind.COMMAND:
1931            self.parser = lambda line_number, line, output: self._handleCommand(
1932                line_number, line, output, self.keyword
1933            )
1934        elif kind == ParserKind.LIST:
1935            self.parser = self._handleList
1936        elif kind == ParserKind.SPACE_LIST:
1937            self.parser = self._handleSpaceList
1938        elif kind == ParserKind.BOOLEAN_EXPR:
1939            self.parser = self._handleBooleanExpr
1940        elif kind == ParserKind.INTEGER:
1941            self.parser = self._handleSingleInteger
1942        elif kind == ParserKind.TAG:
1943            self.parser = self._handleTag
1944        elif kind == ParserKind.CUSTOM:
1945            if parser is None:
1946                raise ValueError("ParserKind.CUSTOM requires a custom parser")
1947            self.parser = parser
1948        elif kind == ParserKind.DEFINE:
1949            self.parser = lambda line_number, line, output: self._handleSubst(
1950                line_number, line, output, self.keyword, new_subst=True
1951            )
1952        elif kind == ParserKind.REDEFINE:
1953            self.parser = lambda line_number, line, output: self._handleSubst(
1954                line_number, line, output, self.keyword, new_subst=False
1955            )
1956        else:
1957            raise ValueError("Unknown kind '%s'" % kind)
1958
1959    def parseLine(self, line_number, line):
1960        try:
1961            self.parsed_lines += [(line_number, line)]
1962            self.value = self.parser(line_number, line, self.value)
1963        except ValueError as e:
1964            raise ValueError(
1965                str(e)
1966                + ("\nin %s directive on test line %d" % (self.keyword, line_number))
1967            )
1968
1969    def getValue(self):
1970        return self.value
1971
1972    @staticmethod
1973    def _handleTag(line_number, line, output):
1974        """A helper for parsing TAG type keywords"""
1975        return not line.strip() or output
1976
1977    @staticmethod
1978    def _substituteLineNumbers(line_number, line):
1979        line = re.sub(r"%\(line\)", str(line_number), line)
1980
1981        def replace_line_number(match):
1982            if match.group(1) == "+":
1983                return str(line_number + int(match.group(2)))
1984            if match.group(1) == "-":
1985                return str(line_number - int(match.group(2)))
1986
1987        return re.sub(r"%\(line *([\+-]) *(\d+)\)", replace_line_number, line)
1988
1989    @classmethod
1990    def _handleCommand(cls, line_number, line, output, keyword):
1991        """A helper for parsing COMMAND type keywords"""
1992        # Substitute line number expressions.
1993        line = cls._substituteLineNumbers(line_number, line)
1994
1995        # Collapse lines with trailing '\\', or add line with line number to
1996        # start a new pipeline.
1997        if not output or not output[-1].add_continuation(line_number, keyword, line):
1998            if output is None:
1999                output = []
2000            line = buildPdbgCommand(f"{keyword} at line {line_number}", line)
2001            output.append(CommandDirective(line_number, line_number, keyword, line))
2002        return output
2003
2004    @staticmethod
2005    def _handleList(line_number, line, output):
2006        """A parser for LIST type keywords"""
2007        if output is None:
2008            output = []
2009        output.extend([s.strip() for s in line.split(",")])
2010        return output
2011
2012    @staticmethod
2013    def _handleSpaceList(line_number, line, output):
2014        """A parser for SPACE_LIST type keywords"""
2015        if output is None:
2016            output = []
2017        output.extend([s.strip() for s in line.split(" ") if s.strip() != ""])
2018        return output
2019
2020    @staticmethod
2021    def _handleSingleInteger(line_number, line, output):
2022        """A parser for INTEGER type keywords"""
2023        if output is None:
2024            output = []
2025        try:
2026            n = int(line)
2027        except ValueError:
2028            raise ValueError(
2029                "INTEGER parser requires the input to be an integer (got {})".format(
2030                    line
2031                )
2032            )
2033        output.append(n)
2034        return output
2035
2036    @staticmethod
2037    def _handleBooleanExpr(line_number, line, output):
2038        """A parser for BOOLEAN_EXPR type keywords"""
2039        parts = [s.strip() for s in line.split(",") if s.strip() != ""]
2040        if output and output[-1][-1] == "\\":
2041            output[-1] = output[-1][:-1] + parts[0]
2042            del parts[0]
2043        if output is None:
2044            output = []
2045        output.extend(parts)
2046        # Evaluate each expression to verify syntax.
2047        # We don't want any results, just the raised ValueError.
2048        for s in output:
2049            if s != "*" and not s.endswith("\\"):
2050                BooleanExpression.evaluate(s, [])
2051        return output
2052
2053    @classmethod
2054    def _handleSubst(cls, line_number, line, output, keyword, new_subst):
2055        """A parser for DEFINE and REDEFINE type keywords"""
2056        line = cls._substituteLineNumbers(line_number, line)
2057        if output and output[-1].add_continuation(line_number, keyword, line):
2058            return output
2059        if output is None:
2060            output = []
2061        output.append(
2062            SubstDirective(line_number, line_number, keyword, new_subst, line)
2063        )
2064        return output
2065
2066
2067def _parseKeywords(sourcepath, additional_parsers=[], require_script=True):
2068    """_parseKeywords
2069
2070    Scan an LLVM/Clang style integrated test script and extract all the lines
2071    pertaining to a special parser. This includes 'RUN', 'XFAIL', 'REQUIRES',
2072    'UNSUPPORTED', 'ALLOW_RETRIES', 'END', 'DEFINE', 'REDEFINE', as well as
2073    other specified custom parsers.
2074
2075    Returns a dictionary mapping each custom parser to its value after
2076    parsing the test.
2077    """
2078    # Install the built-in keyword parsers.
2079    script = []
2080    builtin_parsers = [
2081        IntegratedTestKeywordParser("RUN:", ParserKind.COMMAND, initial_value=script),
2082        IntegratedTestKeywordParser("XFAIL:", ParserKind.BOOLEAN_EXPR),
2083        IntegratedTestKeywordParser("REQUIRES:", ParserKind.BOOLEAN_EXPR),
2084        IntegratedTestKeywordParser("UNSUPPORTED:", ParserKind.BOOLEAN_EXPR),
2085        IntegratedTestKeywordParser("ALLOW_RETRIES:", ParserKind.INTEGER),
2086        IntegratedTestKeywordParser("END.", ParserKind.TAG),
2087        IntegratedTestKeywordParser("DEFINE:", ParserKind.DEFINE, initial_value=script),
2088        IntegratedTestKeywordParser(
2089            "REDEFINE:", ParserKind.REDEFINE, initial_value=script
2090        ),
2091    ]
2092    keyword_parsers = {p.keyword: p for p in builtin_parsers}
2093
2094    # Install user-defined additional parsers.
2095    for parser in additional_parsers:
2096        if not isinstance(parser, IntegratedTestKeywordParser):
2097            raise ValueError(
2098                "Additional parser must be an instance of "
2099                "IntegratedTestKeywordParser"
2100            )
2101        if parser.keyword in keyword_parsers:
2102            raise ValueError("Parser for keyword '%s' already exists" % parser.keyword)
2103        keyword_parsers[parser.keyword] = parser
2104
2105    # Collect the test lines from the script.
2106    for line_number, command_type, ln in parseIntegratedTestScriptCommands(
2107        sourcepath, keyword_parsers.keys()
2108    ):
2109        parser = keyword_parsers[command_type]
2110        parser.parseLine(line_number, ln)
2111        if command_type == "END." and parser.getValue() is True:
2112            break
2113
2114    # Verify the script contains a run line.
2115    if require_script and not any(
2116        isinstance(directive, CommandDirective) for directive in script
2117    ):
2118        raise ValueError("Test has no 'RUN:' line")
2119
2120    # Check for unterminated run or subst lines.
2121    #
2122    # If, after a line continuation for one kind of directive (e.g., 'RUN:',
2123    # 'DEFINE:', 'REDEFINE:') in script, the next directive in script is a
2124    # different kind, then the '\\' remains on the former, and we report it
2125    # here.
2126    for directive in script:
2127        if directive.needs_continuation():
2128            raise ValueError(
2129                f"Test has unterminated '{directive.keyword}' "
2130                f"directive (with '\\') "
2131                f"{directive.get_location()}"
2132            )
2133
2134    # Check boolean expressions for unterminated lines.
2135    for key in keyword_parsers:
2136        kp = keyword_parsers[key]
2137        if kp.kind != ParserKind.BOOLEAN_EXPR:
2138            continue
2139        value = kp.getValue()
2140        if value and value[-1][-1] == "\\":
2141            raise ValueError(
2142                "Test has unterminated '{key}' lines (with '\\')".format(key=key)
2143            )
2144
2145    # Make sure there's at most one ALLOW_RETRIES: line
2146    allowed_retries = keyword_parsers["ALLOW_RETRIES:"].getValue()
2147    if allowed_retries and len(allowed_retries) > 1:
2148        raise ValueError("Test has more than one ALLOW_RETRIES lines")
2149
2150    return {p.keyword: p.getValue() for p in keyword_parsers.values()}
2151
2152
2153def parseIntegratedTestScript(test, additional_parsers=[], require_script=True):
2154    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
2155    script and extract the lines to 'RUN' as well as 'XFAIL', 'REQUIRES',
2156    'UNSUPPORTED' and 'ALLOW_RETRIES' information into the given test.
2157
2158    If additional parsers are specified then the test is also scanned for the
2159    keywords they specify and all matches are passed to the custom parser.
2160
2161    If 'require_script' is False an empty script
2162    may be returned. This can be used for test formats where the actual script
2163    is optional or ignored.
2164    """
2165    # Parse the test sources and extract test properties
2166    try:
2167        parsed = _parseKeywords(
2168            test.getSourcePath(), additional_parsers, require_script
2169        )
2170    except ValueError as e:
2171        return lit.Test.Result(Test.UNRESOLVED, str(e))
2172    script = parsed["RUN:"] or []
2173    assert parsed["DEFINE:"] == script
2174    assert parsed["REDEFINE:"] == script
2175    test.xfails += parsed["XFAIL:"] or []
2176    test.requires += parsed["REQUIRES:"] or []
2177    test.unsupported += parsed["UNSUPPORTED:"] or []
2178    if parsed["ALLOW_RETRIES:"]:
2179        test.allowed_retries = parsed["ALLOW_RETRIES:"][0]
2180
2181    # Enforce REQUIRES:
2182    missing_required_features = test.getMissingRequiredFeatures()
2183    if missing_required_features:
2184        msg = ", ".join(missing_required_features)
2185        return lit.Test.Result(
2186            Test.UNSUPPORTED,
2187            "Test requires the following unavailable " "features: %s" % msg,
2188        )
2189
2190    # Enforce UNSUPPORTED:
2191    unsupported_features = test.getUnsupportedFeatures()
2192    if unsupported_features:
2193        msg = ", ".join(unsupported_features)
2194        return lit.Test.Result(
2195            Test.UNSUPPORTED,
2196            "Test does not support the following features " "and/or targets: %s" % msg,
2197        )
2198
2199    # Enforce limit_to_features.
2200    if not test.isWithinFeatureLimits():
2201        msg = ", ".join(test.config.limit_to_features)
2202        return lit.Test.Result(
2203            Test.UNSUPPORTED,
2204            "Test does not require any of the features "
2205            "specified in limit_to_features: %s" % msg,
2206        )
2207
2208    return script
2209
2210
2211def _runShTest(test, litConfig, useExternalSh, script, tmpBase) -> lit.Test.Result:
2212    # Always returns the tuple (out, err, exitCode, timeoutInfo, status).
2213    def runOnce(
2214        execdir,
2215    ) -> Tuple[str, str, int, Optional[str], Test.ResultCode]:
2216        # script is modified below (for litConfig.per_test_coverage, and for
2217        # %dbg expansions).  runOnce can be called multiple times, but applying
2218        # the modifications multiple times can corrupt script, so always modify
2219        # a copy.
2220        scriptCopy = script[:]
2221        # Set unique LLVM_PROFILE_FILE for each run command
2222        if litConfig.per_test_coverage:
2223            # Extract the test case name from the test object, and remove the
2224            # file extension.
2225            test_case_name = test.path_in_suite[-1]
2226            test_case_name = test_case_name.rsplit(".", 1)[0]
2227            coverage_index = 0  # Counter for coverage file index
2228            for i, ln in enumerate(scriptCopy):
2229                match = re.fullmatch(kPdbgRegex, ln)
2230                if match:
2231                    dbg = match.group(1)
2232                    command = match.group(2)
2233                else:
2234                    command = ln
2235                profile = f"{test_case_name}{coverage_index}.profraw"
2236                coverage_index += 1
2237                command = f"export LLVM_PROFILE_FILE={profile}; {command}"
2238                if match:
2239                    command = buildPdbgCommand(dbg, command)
2240                scriptCopy[i] = command
2241
2242        try:
2243            if useExternalSh:
2244                res = executeScript(test, litConfig, tmpBase, scriptCopy, execdir)
2245            else:
2246                res = executeScriptInternal(
2247                    test, litConfig, tmpBase, scriptCopy, execdir
2248                )
2249        except ScriptFatal as e:
2250            out = f"# " + "\n# ".join(str(e).splitlines()) + "\n"
2251            return out, "", 1, None, Test.UNRESOLVED
2252
2253        out, err, exitCode, timeoutInfo = res
2254        if exitCode == 0:
2255            status = Test.PASS
2256        else:
2257            if timeoutInfo is None:
2258                status = Test.FAIL
2259            else:
2260                status = Test.TIMEOUT
2261        return out, err, exitCode, timeoutInfo, status
2262
2263    # Create the output directory if it does not already exist.
2264    lit.util.mkdir_p(os.path.dirname(tmpBase))
2265
2266    # Re-run failed tests up to test.allowed_retries times.
2267    execdir = os.path.dirname(test.getExecPath())
2268    attempts = test.allowed_retries + 1
2269    for i in range(attempts):
2270        res = runOnce(execdir)
2271        out, err, exitCode, timeoutInfo, status = res
2272        if status != Test.FAIL:
2273            break
2274
2275    # If we had to run the test more than once, count it as a flaky pass. These
2276    # will be printed separately in the test summary.
2277    if i > 0 and status == Test.PASS:
2278        status = Test.FLAKYPASS
2279
2280    # Form the output log.
2281    output = f"Exit Code: {exitCode}\n"
2282
2283    if timeoutInfo is not None:
2284        output += """Timeout: %s\n""" % (timeoutInfo,)
2285    output += "\n"
2286
2287    # Append the outputs, if present.
2288    if out:
2289        output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)
2290    if err:
2291        output += """Command Output (stderr):\n--\n%s\n--\n""" % (err,)
2292
2293    return lit.Test.Result(status, output)
2294
2295
2296def executeShTest(
2297    test, litConfig, useExternalSh, extra_substitutions=[], preamble_commands=[]
2298):
2299    if test.config.unsupported:
2300        return lit.Test.Result(Test.UNSUPPORTED, "Test is unsupported")
2301
2302    script = list(preamble_commands)
2303    script = [buildPdbgCommand(f"preamble command line", ln) for ln in script]
2304
2305    parsed = parseIntegratedTestScript(test, require_script=not script)
2306    if isinstance(parsed, lit.Test.Result):
2307        return parsed
2308    script += parsed
2309
2310    if litConfig.noExecute:
2311        return lit.Test.Result(Test.PASS)
2312
2313    tmpDir, tmpBase = getTempPaths(test)
2314    substitutions = list(extra_substitutions)
2315    substitutions += getDefaultSubstitutions(
2316        test, tmpDir, tmpBase, normalize_slashes=useExternalSh
2317    )
2318    conditions = {feature: True for feature in test.config.available_features}
2319    script = applySubstitutions(
2320        script,
2321        substitutions,
2322        conditions,
2323        recursion_limit=test.config.recursiveExpansionLimit,
2324    )
2325
2326    return _runShTest(test, litConfig, useExternalSh, script, tmpBase)
2327