xref: /llvm-project/cross-project-tests/debuginfo-tests/dexter/dex/command/ParseCommand.py (revision a1a3e019d7adbacaa848bee12020e4d9a8401c02)
1# DExTer : Debugging Experience Tester
2# ~~~~~~   ~         ~~         ~   ~~
3#
4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5# See https://llvm.org/LICENSE.txt for license information.
6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7"""Parse a DExTer command. In particular, ensure that only a very limited
8subset of Python is allowed, in order to prevent the possibility of unsafe
9Python code being embedded within DExTer commands.
10"""
11
12import os
13import unittest
14from copy import copy
15from pathlib import PurePath
16from collections import defaultdict, OrderedDict, namedtuple
17
18from dex.utils.Exceptions import CommandParseError, NonFloatValueInCommand
19
20from dex.command.CommandBase import CommandBase
21from dex.command.commands.DexCommandLine import DexCommandLine
22from dex.command.commands.DexDeclareFile import DexDeclareFile
23from dex.command.commands.DexDeclareAddress import DexDeclareAddress
24from dex.command.commands.DexExpectProgramState import DexExpectProgramState
25from dex.command.commands.DexExpectStepKind import DexExpectStepKind
26from dex.command.commands.DexExpectStepOrder import DexExpectStepOrder
27from dex.command.commands.DexExpectWatchType import DexExpectWatchType
28from dex.command.commands.DexExpectWatchValue import DexExpectWatchValue
29from dex.command.commands.DexExpectWatchBase import (
30    AddressExpression,
31    DexExpectWatchBase,
32)
33from dex.command.commands.DexLabel import DexLabel
34from dex.command.commands.DexLimitSteps import DexLimitSteps
35from dex.command.commands.DexFinishTest import DexFinishTest
36from dex.command.commands.DexUnreachable import DexUnreachable
37from dex.command.commands.DexWatch import DexWatch
38from dex.utils import Timer
39from dex.utils.Exceptions import CommandParseError, DebuggerException
40
41
42def _get_valid_commands():
43    """Return all top level DExTer test commands.
44
45    Returns:
46        { name (str): command (class) }
47    """
48    return {
49        DexCommandLine.get_name(): DexCommandLine,
50        DexDeclareAddress.get_name(): DexDeclareAddress,
51        DexDeclareFile.get_name(): DexDeclareFile,
52        DexExpectProgramState.get_name(): DexExpectProgramState,
53        DexExpectStepKind.get_name(): DexExpectStepKind,
54        DexExpectStepOrder.get_name(): DexExpectStepOrder,
55        DexExpectWatchType.get_name(): DexExpectWatchType,
56        DexExpectWatchValue.get_name(): DexExpectWatchValue,
57        DexLabel.get_name(): DexLabel,
58        DexLimitSteps.get_name(): DexLimitSteps,
59        DexFinishTest.get_name(): DexFinishTest,
60        DexUnreachable.get_name(): DexUnreachable,
61        DexWatch.get_name(): DexWatch,
62    }
63
64
65def _get_command_name(command_raw: str) -> str:
66    """Return command name by splitting up DExTer command contained in
67    command_raw on the first opening paranthesis and further stripping
68    any potential leading or trailing whitespace.
69    """
70    return command_raw.split("(", 1)[0].rstrip()
71
72
73def _merge_subcommands(command_name: str, valid_commands: dict) -> dict:
74    """Merge valid_commands and command_name's subcommands into a new dict.
75
76    Returns:
77        { name (str): command (class) }
78    """
79    subcommands = valid_commands[command_name].get_subcommands()
80    if subcommands:
81        return {**valid_commands, **subcommands}
82    return valid_commands
83
84
85def _build_command(
86    command_type, labels, addresses, raw_text: str, path, lineno: str
87) -> CommandBase:
88    """Build a command object from raw text.
89
90    This function will call eval().
91
92    Raises:
93        Any exception that eval() can raise.
94
95    Returns:
96        A dexter command object.
97    """
98
99    def label_to_line(label_name: str) -> int:
100        line = labels.get(label_name, None)
101        if line is not None:
102            return line
103        raise format_unresolved_label_err(label_name, raw_text, path.base, lineno)
104
105    def get_address_object(address_name: str, offset: int = 0):
106        if address_name not in addresses:
107            raise format_undeclared_address_err(
108                address_name, raw_text, path.base, lineno
109            )
110        return AddressExpression(address_name, offset)
111
112    valid_commands = _merge_subcommands(
113        command_type.get_name(),
114        {
115            "ref": label_to_line,
116            "address": get_address_object,
117            command_type.get_name(): command_type,
118        },
119    )
120
121    # pylint: disable=eval-used
122    command = eval(raw_text, valid_commands)
123    # pylint: enable=eval-used
124    command.raw_text = raw_text
125    command.path = path.declared
126    command.lineno = lineno
127    return command
128
129
130def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int:
131    r"""Scan `line` for a string matching any key in `valid_commands`.
132
133    Start searching from `start`.
134    Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored.
135
136    Returns:
137        int: the index of the first character of the matching string in `line`
138        or -1 if no command is found.
139    """
140    for command in valid_commands:
141        idx = line.find(command, start)
142        if idx != -1:
143            # Ignore escaped '\' commands.
144            if idx > 0 and line[idx - 1] == "\\":
145                continue
146            return idx
147    return -1
148
149
150def _search_line_for_cmd_end(line: str, start: int, paren_balance: int) -> (int, int):
151    """Find the end of a command by looking for balanced parentheses.
152
153    Args:
154        line: String to scan.
155        start: Index into `line` to start looking.
156        paren_balance(int): paren_balance after previous call.
157
158    Note:
159        On the first call `start` should point at the opening parenthesis and
160        `paren_balance` should be set to 0. Subsequent calls should pass in the
161        returned `paren_balance`.
162
163    Returns:
164        ( end,  paren_balance )
165        Where end is 1 + the index of the last char in the command or, if the
166        parentheses are not balanced, the end of the line.
167
168        paren_balance will be 0 when the parentheses are balanced.
169    """
170    for end in range(start, len(line)):
171        ch = line[end]
172        if ch == "(":
173            paren_balance += 1
174        elif ch == ")":
175            paren_balance -= 1
176        if paren_balance == 0:
177            break
178    end += 1
179    return (end, paren_balance)
180
181
182class TextPoint:
183    def __init__(self, line, char):
184        self.line = line
185        self.char = char
186
187    def get_lineno(self):
188        return self.line + 1
189
190    def get_column(self):
191        return self.char + 1
192
193
194def format_unresolved_label_err(
195    label: str, src: str, filename: str, lineno
196) -> CommandParseError:
197    err = CommandParseError()
198    err.src = src
199    err.caret = ""  # Don't bother trying to point to the bad label.
200    err.filename = filename
201    err.lineno = lineno
202    err.info = f"Unresolved label: '{label}'"
203    return err
204
205
206def format_undeclared_address_err(
207    address: str, src: str, filename: str, lineno
208) -> CommandParseError:
209    err = CommandParseError()
210    err.src = src
211    err.caret = ""  # Don't bother trying to point to the bad address.
212    err.filename = filename
213    err.lineno = lineno
214    err.info = f"Undeclared address: '{address}'"
215    return err
216
217
218def format_parse_err(
219    msg: str, path: str, lines: list, point: TextPoint
220) -> CommandParseError:
221    err = CommandParseError()
222    err.filename = path
223    err.src = lines[point.line].rstrip()
224    err.lineno = point.get_lineno()
225    err.info = msg
226    err.caret = "{}<r>^</>".format(" " * (point.char))
227    return err
228
229
230def skip_horizontal_whitespace(line, point):
231    for idx, char in enumerate(line[point.char :]):
232        if char not in " \t":
233            point.char += idx
234            return
235
236
237def add_line_label(labels, label, cmd_path, cmd_lineno):
238    # Enforce unique line labels.
239    if label.eval() in labels:
240        err = CommandParseError()
241        err.info = f"Found duplicate line label: '{label.eval()}'"
242        err.lineno = cmd_lineno
243        err.filename = cmd_path
244        err.src = label.raw_text
245        # Don't both trying to point to it since we're only printing the raw
246        # command, which isn't much text.
247        err.caret = ""
248        raise err
249    labels[label.eval()] = label.get_line()
250
251
252def add_address(addresses, address, cmd_path, cmd_lineno):
253    # Enforce unique address variables.
254    address_name = address.get_address_name()
255    if address_name in addresses:
256        err = CommandParseError()
257        err.info = f"Found duplicate address: '{address_name}'"
258        err.lineno = cmd_lineno
259        err.filename = cmd_path
260        err.src = address.raw_text
261        # Don't both trying to point to it since we're only printing the raw
262        # command, which isn't much text.
263        err.caret = ""
264        raise err
265    addresses.append(address_name)
266
267
268def _find_all_commands_in_file(path, file_lines, valid_commands, source_root_dir):
269    labels = {}  # dict of {name: line}.
270    addresses = []  # list of addresses.
271    address_resolutions = {}
272    CmdPath = namedtuple("cmd_path", "base declared")
273    cmd_path = CmdPath(path, path)
274    declared_files = set()
275    commands = defaultdict(dict)
276    paren_balance = 0
277    region_start = TextPoint(0, 0)
278
279    for region_start.line in range(len(file_lines)):
280        line = file_lines[region_start.line]
281        region_start.char = 0
282
283        # Search this line till we find no more commands.
284        while True:
285            # If parens are currently balanced we can look for a new command.
286            if paren_balance == 0:
287                region_start.char = _search_line_for_cmd_start(
288                    line, region_start.char, valid_commands
289                )
290                if region_start.char == -1:
291                    break  # Read next line.
292
293                command_name = _get_command_name(line[region_start.char :])
294                cmd_point = copy(region_start)
295                cmd_text_list = [command_name]
296
297                region_start.char += len(
298                    command_name
299                )  # Start searching for parens after cmd.
300                skip_horizontal_whitespace(line, region_start)
301                if region_start.char >= len(line) or line[region_start.char] != "(":
302                    raise format_parse_err(
303                        "Missing open parenthesis", path, file_lines, region_start
304                    )
305
306            end, paren_balance = _search_line_for_cmd_end(
307                line, region_start.char, paren_balance
308            )
309            # Add this text blob to the command.
310            cmd_text_list.append(line[region_start.char : end])
311            # Move parse ptr to end of line or parens.
312            region_start.char = end
313
314            # If the parens are unbalanced start reading the next line in an attempt
315            # to find the end of the command.
316            if paren_balance != 0:
317                break  # Read next line.
318
319            # Parens are balanced, we have a full command to evaluate.
320            raw_text = "".join(cmd_text_list)
321            try:
322                command = _build_command(
323                    valid_commands[command_name],
324                    labels,
325                    addresses,
326                    raw_text,
327                    cmd_path,
328                    cmd_point.get_lineno(),
329                )
330            except SyntaxError as e:
331                # This err should point to the problem line.
332                err_point = copy(cmd_point)
333                # To e the command start is the absolute start, so use as offset.
334                err_point.line += e.lineno - 1  # e.lineno is a position, not index.
335                err_point.char += e.offset - 1  # e.offset is a position, not index.
336                raise format_parse_err(e.msg, path, file_lines, err_point)
337            except TypeError as e:
338                # This err should always point to the end of the command name.
339                err_point = copy(cmd_point)
340                err_point.char += len(command_name)
341                raise format_parse_err(str(e), path, file_lines, err_point)
342            except NonFloatValueInCommand as e:
343                err_point = copy(cmd_point)
344                err_point.char += len(command_name)
345                raise format_parse_err(str(e), path, file_lines, err_point)
346            else:
347                if type(command) is DexLabel:
348                    add_line_label(labels, command, path, cmd_point.get_lineno())
349                elif type(command) is DexDeclareAddress:
350                    add_address(addresses, command, path, cmd_point.get_lineno())
351                elif type(command) is DexDeclareFile:
352                    declared_path = command.declared_file
353                    if not os.path.isabs(declared_path):
354                        source_dir = (
355                            source_root_dir
356                            if source_root_dir
357                            else os.path.dirname(path)
358                        )
359                        declared_path = os.path.join(source_dir, declared_path)
360                    cmd_path = CmdPath(cmd_path.base, str(PurePath(declared_path)))
361                    declared_files.add(cmd_path.declared)
362                elif type(command) is DexCommandLine and "DexCommandLine" in commands:
363                    msg = "More than one DexCommandLine in file"
364                    raise format_parse_err(msg, path, file_lines, err_point)
365
366                assert (path, cmd_point) not in commands[command_name], (
367                    command_name,
368                    commands[command_name],
369                )
370                commands[command_name][path, cmd_point] = command
371
372    if paren_balance != 0:
373        # This err should always point to the end of the command name.
374        err_point = copy(cmd_point)
375        err_point.char += len(command_name)
376        msg = "Unbalanced parenthesis starting here"
377        raise format_parse_err(msg, path, file_lines, err_point)
378    return dict(commands), declared_files
379
380
381def _find_all_commands(test_files, source_root_dir):
382    commands = defaultdict(dict)
383    valid_commands = _get_valid_commands()
384    new_source_files = set()
385    for test_file in test_files:
386        with open(test_file) as fp:
387            lines = fp.readlines()
388        file_commands, declared_files = _find_all_commands_in_file(
389            test_file, lines, valid_commands, source_root_dir
390        )
391        for command_name in file_commands:
392            commands[command_name].update(file_commands[command_name])
393        new_source_files |= declared_files
394
395    return dict(commands), new_source_files
396
397
398def get_command_infos(test_files, source_root_dir):
399    with Timer("parsing commands"):
400        try:
401            commands, new_source_files = _find_all_commands(test_files, source_root_dir)
402            command_infos = OrderedDict()
403            for command_type in commands:
404                for command in commands[command_type].values():
405                    if command_type not in command_infos:
406                        command_infos[command_type] = []
407                    command_infos[command_type].append(command)
408            return OrderedDict(command_infos), new_source_files
409        except CommandParseError as e:
410            msg = "parser error: <d>{}({}):</> {}\n{}\n{}\n".format(
411                e.filename, e.lineno, e.info, e.src, e.caret
412            )
413            raise DebuggerException(msg)
414
415
416class TestParseCommand(unittest.TestCase):
417    class MockCmd(CommandBase):
418        """A mock DExTer command for testing parsing.
419
420        Args:
421            value (str): Unique name for this instance.
422        """
423
424        def __init__(self, *args):
425            self.value = args[0]
426
427        def get_name():
428            return __class__.__name__
429
430        def eval(this):
431            pass
432
433    def __init__(self, *args):
434        super().__init__(*args)
435
436        self.valid_commands = {
437            TestParseCommand.MockCmd.get_name(): TestParseCommand.MockCmd
438        }
439
440    def _find_all_commands_in_lines(self, lines):
441        """Use DExTer parsing methods to find all the mock commands in lines.
442
443        Returns:
444            { cmd_name: { (path, line): command_obj } }
445        """
446        cmds, declared_files = _find_all_commands_in_file(
447            __file__, lines, self.valid_commands, None
448        )
449        return cmds
450
451    def _find_all_mock_values_in_lines(self, lines):
452        """Use DExTer parsing methods to find all mock command values in lines.
453
454        Returns:
455            values (list(str)): MockCmd values found in lines.
456        """
457        cmds = self._find_all_commands_in_lines(lines)
458        mocks = cmds.get(TestParseCommand.MockCmd.get_name(), None)
459        return [v.value for v in mocks.values()] if mocks else []
460
461    def test_parse_inline(self):
462        """Commands can be embedded in other text."""
463
464        lines = [
465            'MockCmd("START") Lorem ipsum dolor sit amet, consectetur\n',
466            'adipiscing elit, MockCmd("EMBEDDED") sed doeiusmod tempor,\n',
467            "incididunt ut labore et dolore magna aliqua.\n",
468        ]
469
470        values = self._find_all_mock_values_in_lines(lines)
471
472        self.assertTrue("START" in values)
473        self.assertTrue("EMBEDDED" in values)
474
475    def test_parse_multi_line_comment(self):
476        """Multi-line commands can embed comments."""
477
478        lines = [
479            "Lorem ipsum dolor sit amet, consectetur\n",
480            "adipiscing elit, sed doeiusmod tempor,\n",
481            "incididunt ut labore et MockCmd(\n",
482            '    "WITH_COMMENT" # THIS IS A COMMENT\n',
483            ") dolore magna aliqua. Ut enim ad minim\n",
484        ]
485
486        values = self._find_all_mock_values_in_lines(lines)
487
488        self.assertTrue("WITH_COMMENT" in values)
489
490    def test_parse_empty(self):
491        """Empty files are silently ignored."""
492
493        lines = []
494        values = self._find_all_mock_values_in_lines(lines)
495        self.assertTrue(len(values) == 0)
496
497    def test_parse_bad_whitespace(self):
498        """Throw exception when parsing badly formed whitespace."""
499        lines = [
500            "MockCmd\n",
501            '("XFAIL_CMD_LF_PAREN")\n',
502        ]
503
504        with self.assertRaises(CommandParseError):
505            values = self._find_all_mock_values_in_lines(lines)
506
507    def test_parse_good_whitespace(self):
508        """Try to emulate python whitespace rules"""
509
510        lines = [
511            'MockCmd("NONE")\n',
512            'MockCmd    ("SPACE")\n',
513            'MockCmd\t\t("TABS")\n',
514            'MockCmd(    "ARG_SPACE"    )\n',
515            'MockCmd(\t\t"ARG_TABS"\t\t)\n',
516            "MockCmd(\n",
517            '"CMD_PAREN_LF")\n',
518        ]
519
520        values = self._find_all_mock_values_in_lines(lines)
521
522        self.assertTrue("NONE" in values)
523        self.assertTrue("SPACE" in values)
524        self.assertTrue("TABS" in values)
525        self.assertTrue("ARG_SPACE" in values)
526        self.assertTrue("ARG_TABS" in values)
527        self.assertTrue("CMD_PAREN_LF" in values)
528
529    def test_parse_share_line(self):
530        """More than one command can appear on one line."""
531
532        lines = [
533            'MockCmd("START") MockCmd("CONSECUTIVE") words '
534            'MockCmd("EMBEDDED") more words\n'
535        ]
536
537        values = self._find_all_mock_values_in_lines(lines)
538
539        self.assertTrue("START" in values)
540        self.assertTrue("CONSECUTIVE" in values)
541        self.assertTrue("EMBEDDED" in values)
542
543    def test_parse_escaped(self):
544        """Escaped commands are ignored."""
545
546        lines = ['words \\MockCmd("IGNORED") words words words\n']
547
548        values = self._find_all_mock_values_in_lines(lines)
549
550        self.assertFalse("IGNORED" in values)
551