1# DExTer : Debugging Experience Tester 2# ~~~~~~ ~ ~~ ~ ~~ 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7"""Parse a DExTer command. In particular, ensure that only a very limited 8subset of Python is allowed, in order to prevent the possibility of unsafe 9Python code being embedded within DExTer commands. 10""" 11 12import os 13import unittest 14from copy import copy 15from pathlib import PurePath 16from collections import defaultdict, OrderedDict, namedtuple 17 18from dex.utils.Exceptions import CommandParseError, NonFloatValueInCommand 19 20from dex.command.CommandBase import CommandBase 21from dex.command.commands.DexCommandLine import DexCommandLine 22from dex.command.commands.DexDeclareFile import DexDeclareFile 23from dex.command.commands.DexDeclareAddress import DexDeclareAddress 24from dex.command.commands.DexExpectProgramState import DexExpectProgramState 25from dex.command.commands.DexExpectStepKind import DexExpectStepKind 26from dex.command.commands.DexExpectStepOrder import DexExpectStepOrder 27from dex.command.commands.DexExpectWatchType import DexExpectWatchType 28from dex.command.commands.DexExpectWatchValue import DexExpectWatchValue 29from dex.command.commands.DexExpectWatchBase import ( 30 AddressExpression, 31 DexExpectWatchBase, 32) 33from dex.command.commands.DexLabel import DexLabel 34from dex.command.commands.DexLimitSteps import DexLimitSteps 35from dex.command.commands.DexFinishTest import DexFinishTest 36from dex.command.commands.DexUnreachable import DexUnreachable 37from dex.command.commands.DexWatch import DexWatch 38from dex.utils import Timer 39from dex.utils.Exceptions import CommandParseError, DebuggerException 40 41 42def _get_valid_commands(): 43 """Return all top level DExTer test commands. 44 45 Returns: 46 { name (str): command (class) } 47 """ 48 return { 49 DexCommandLine.get_name(): DexCommandLine, 50 DexDeclareAddress.get_name(): DexDeclareAddress, 51 DexDeclareFile.get_name(): DexDeclareFile, 52 DexExpectProgramState.get_name(): DexExpectProgramState, 53 DexExpectStepKind.get_name(): DexExpectStepKind, 54 DexExpectStepOrder.get_name(): DexExpectStepOrder, 55 DexExpectWatchType.get_name(): DexExpectWatchType, 56 DexExpectWatchValue.get_name(): DexExpectWatchValue, 57 DexLabel.get_name(): DexLabel, 58 DexLimitSteps.get_name(): DexLimitSteps, 59 DexFinishTest.get_name(): DexFinishTest, 60 DexUnreachable.get_name(): DexUnreachable, 61 DexWatch.get_name(): DexWatch, 62 } 63 64 65def _get_command_name(command_raw: str) -> str: 66 """Return command name by splitting up DExTer command contained in 67 command_raw on the first opening paranthesis and further stripping 68 any potential leading or trailing whitespace. 69 """ 70 return command_raw.split("(", 1)[0].rstrip() 71 72 73def _merge_subcommands(command_name: str, valid_commands: dict) -> dict: 74 """Merge valid_commands and command_name's subcommands into a new dict. 75 76 Returns: 77 { name (str): command (class) } 78 """ 79 subcommands = valid_commands[command_name].get_subcommands() 80 if subcommands: 81 return {**valid_commands, **subcommands} 82 return valid_commands 83 84 85def _build_command( 86 command_type, labels, addresses, raw_text: str, path, lineno: str 87) -> CommandBase: 88 """Build a command object from raw text. 89 90 This function will call eval(). 91 92 Raises: 93 Any exception that eval() can raise. 94 95 Returns: 96 A dexter command object. 97 """ 98 99 def label_to_line(label_name: str) -> int: 100 line = labels.get(label_name, None) 101 if line is not None: 102 return line 103 raise format_unresolved_label_err(label_name, raw_text, path.base, lineno) 104 105 def get_address_object(address_name: str, offset: int = 0): 106 if address_name not in addresses: 107 raise format_undeclared_address_err( 108 address_name, raw_text, path.base, lineno 109 ) 110 return AddressExpression(address_name, offset) 111 112 valid_commands = _merge_subcommands( 113 command_type.get_name(), 114 { 115 "ref": label_to_line, 116 "address": get_address_object, 117 command_type.get_name(): command_type, 118 }, 119 ) 120 121 # pylint: disable=eval-used 122 command = eval(raw_text, valid_commands) 123 # pylint: enable=eval-used 124 command.raw_text = raw_text 125 command.path = path.declared 126 command.lineno = lineno 127 return command 128 129 130def _search_line_for_cmd_start(line: str, start: int, valid_commands: dict) -> int: 131 r"""Scan `line` for a string matching any key in `valid_commands`. 132 133 Start searching from `start`. 134 Commands escaped with `\` (E.g. `\DexLabel('a')`) are ignored. 135 136 Returns: 137 int: the index of the first character of the matching string in `line` 138 or -1 if no command is found. 139 """ 140 for command in valid_commands: 141 idx = line.find(command, start) 142 if idx != -1: 143 # Ignore escaped '\' commands. 144 if idx > 0 and line[idx - 1] == "\\": 145 continue 146 return idx 147 return -1 148 149 150def _search_line_for_cmd_end(line: str, start: int, paren_balance: int) -> (int, int): 151 """Find the end of a command by looking for balanced parentheses. 152 153 Args: 154 line: String to scan. 155 start: Index into `line` to start looking. 156 paren_balance(int): paren_balance after previous call. 157 158 Note: 159 On the first call `start` should point at the opening parenthesis and 160 `paren_balance` should be set to 0. Subsequent calls should pass in the 161 returned `paren_balance`. 162 163 Returns: 164 ( end, paren_balance ) 165 Where end is 1 + the index of the last char in the command or, if the 166 parentheses are not balanced, the end of the line. 167 168 paren_balance will be 0 when the parentheses are balanced. 169 """ 170 for end in range(start, len(line)): 171 ch = line[end] 172 if ch == "(": 173 paren_balance += 1 174 elif ch == ")": 175 paren_balance -= 1 176 if paren_balance == 0: 177 break 178 end += 1 179 return (end, paren_balance) 180 181 182class TextPoint: 183 def __init__(self, line, char): 184 self.line = line 185 self.char = char 186 187 def get_lineno(self): 188 return self.line + 1 189 190 def get_column(self): 191 return self.char + 1 192 193 194def format_unresolved_label_err( 195 label: str, src: str, filename: str, lineno 196) -> CommandParseError: 197 err = CommandParseError() 198 err.src = src 199 err.caret = "" # Don't bother trying to point to the bad label. 200 err.filename = filename 201 err.lineno = lineno 202 err.info = f"Unresolved label: '{label}'" 203 return err 204 205 206def format_undeclared_address_err( 207 address: str, src: str, filename: str, lineno 208) -> CommandParseError: 209 err = CommandParseError() 210 err.src = src 211 err.caret = "" # Don't bother trying to point to the bad address. 212 err.filename = filename 213 err.lineno = lineno 214 err.info = f"Undeclared address: '{address}'" 215 return err 216 217 218def format_parse_err( 219 msg: str, path: str, lines: list, point: TextPoint 220) -> CommandParseError: 221 err = CommandParseError() 222 err.filename = path 223 err.src = lines[point.line].rstrip() 224 err.lineno = point.get_lineno() 225 err.info = msg 226 err.caret = "{}<r>^</>".format(" " * (point.char)) 227 return err 228 229 230def skip_horizontal_whitespace(line, point): 231 for idx, char in enumerate(line[point.char :]): 232 if char not in " \t": 233 point.char += idx 234 return 235 236 237def add_line_label(labels, label, cmd_path, cmd_lineno): 238 # Enforce unique line labels. 239 if label.eval() in labels: 240 err = CommandParseError() 241 err.info = f"Found duplicate line label: '{label.eval()}'" 242 err.lineno = cmd_lineno 243 err.filename = cmd_path 244 err.src = label.raw_text 245 # Don't both trying to point to it since we're only printing the raw 246 # command, which isn't much text. 247 err.caret = "" 248 raise err 249 labels[label.eval()] = label.get_line() 250 251 252def add_address(addresses, address, cmd_path, cmd_lineno): 253 # Enforce unique address variables. 254 address_name = address.get_address_name() 255 if address_name in addresses: 256 err = CommandParseError() 257 err.info = f"Found duplicate address: '{address_name}'" 258 err.lineno = cmd_lineno 259 err.filename = cmd_path 260 err.src = address.raw_text 261 # Don't both trying to point to it since we're only printing the raw 262 # command, which isn't much text. 263 err.caret = "" 264 raise err 265 addresses.append(address_name) 266 267 268def _find_all_commands_in_file(path, file_lines, valid_commands, source_root_dir): 269 labels = {} # dict of {name: line}. 270 addresses = [] # list of addresses. 271 address_resolutions = {} 272 CmdPath = namedtuple("cmd_path", "base declared") 273 cmd_path = CmdPath(path, path) 274 declared_files = set() 275 commands = defaultdict(dict) 276 paren_balance = 0 277 region_start = TextPoint(0, 0) 278 279 for region_start.line in range(len(file_lines)): 280 line = file_lines[region_start.line] 281 region_start.char = 0 282 283 # Search this line till we find no more commands. 284 while True: 285 # If parens are currently balanced we can look for a new command. 286 if paren_balance == 0: 287 region_start.char = _search_line_for_cmd_start( 288 line, region_start.char, valid_commands 289 ) 290 if region_start.char == -1: 291 break # Read next line. 292 293 command_name = _get_command_name(line[region_start.char :]) 294 cmd_point = copy(region_start) 295 cmd_text_list = [command_name] 296 297 region_start.char += len( 298 command_name 299 ) # Start searching for parens after cmd. 300 skip_horizontal_whitespace(line, region_start) 301 if region_start.char >= len(line) or line[region_start.char] != "(": 302 raise format_parse_err( 303 "Missing open parenthesis", path, file_lines, region_start 304 ) 305 306 end, paren_balance = _search_line_for_cmd_end( 307 line, region_start.char, paren_balance 308 ) 309 # Add this text blob to the command. 310 cmd_text_list.append(line[region_start.char : end]) 311 # Move parse ptr to end of line or parens. 312 region_start.char = end 313 314 # If the parens are unbalanced start reading the next line in an attempt 315 # to find the end of the command. 316 if paren_balance != 0: 317 break # Read next line. 318 319 # Parens are balanced, we have a full command to evaluate. 320 raw_text = "".join(cmd_text_list) 321 try: 322 command = _build_command( 323 valid_commands[command_name], 324 labels, 325 addresses, 326 raw_text, 327 cmd_path, 328 cmd_point.get_lineno(), 329 ) 330 except SyntaxError as e: 331 # This err should point to the problem line. 332 err_point = copy(cmd_point) 333 # To e the command start is the absolute start, so use as offset. 334 err_point.line += e.lineno - 1 # e.lineno is a position, not index. 335 err_point.char += e.offset - 1 # e.offset is a position, not index. 336 raise format_parse_err(e.msg, path, file_lines, err_point) 337 except TypeError as e: 338 # This err should always point to the end of the command name. 339 err_point = copy(cmd_point) 340 err_point.char += len(command_name) 341 raise format_parse_err(str(e), path, file_lines, err_point) 342 except NonFloatValueInCommand as e: 343 err_point = copy(cmd_point) 344 err_point.char += len(command_name) 345 raise format_parse_err(str(e), path, file_lines, err_point) 346 else: 347 if type(command) is DexLabel: 348 add_line_label(labels, command, path, cmd_point.get_lineno()) 349 elif type(command) is DexDeclareAddress: 350 add_address(addresses, command, path, cmd_point.get_lineno()) 351 elif type(command) is DexDeclareFile: 352 declared_path = command.declared_file 353 if not os.path.isabs(declared_path): 354 source_dir = ( 355 source_root_dir 356 if source_root_dir 357 else os.path.dirname(path) 358 ) 359 declared_path = os.path.join(source_dir, declared_path) 360 cmd_path = CmdPath(cmd_path.base, str(PurePath(declared_path))) 361 declared_files.add(cmd_path.declared) 362 elif type(command) is DexCommandLine and "DexCommandLine" in commands: 363 msg = "More than one DexCommandLine in file" 364 raise format_parse_err(msg, path, file_lines, err_point) 365 366 assert (path, cmd_point) not in commands[command_name], ( 367 command_name, 368 commands[command_name], 369 ) 370 commands[command_name][path, cmd_point] = command 371 372 if paren_balance != 0: 373 # This err should always point to the end of the command name. 374 err_point = copy(cmd_point) 375 err_point.char += len(command_name) 376 msg = "Unbalanced parenthesis starting here" 377 raise format_parse_err(msg, path, file_lines, err_point) 378 return dict(commands), declared_files 379 380 381def _find_all_commands(test_files, source_root_dir): 382 commands = defaultdict(dict) 383 valid_commands = _get_valid_commands() 384 new_source_files = set() 385 for test_file in test_files: 386 with open(test_file) as fp: 387 lines = fp.readlines() 388 file_commands, declared_files = _find_all_commands_in_file( 389 test_file, lines, valid_commands, source_root_dir 390 ) 391 for command_name in file_commands: 392 commands[command_name].update(file_commands[command_name]) 393 new_source_files |= declared_files 394 395 return dict(commands), new_source_files 396 397 398def get_command_infos(test_files, source_root_dir): 399 with Timer("parsing commands"): 400 try: 401 commands, new_source_files = _find_all_commands(test_files, source_root_dir) 402 command_infos = OrderedDict() 403 for command_type in commands: 404 for command in commands[command_type].values(): 405 if command_type not in command_infos: 406 command_infos[command_type] = [] 407 command_infos[command_type].append(command) 408 return OrderedDict(command_infos), new_source_files 409 except CommandParseError as e: 410 msg = "parser error: <d>{}({}):</> {}\n{}\n{}\n".format( 411 e.filename, e.lineno, e.info, e.src, e.caret 412 ) 413 raise DebuggerException(msg) 414 415 416class TestParseCommand(unittest.TestCase): 417 class MockCmd(CommandBase): 418 """A mock DExTer command for testing parsing. 419 420 Args: 421 value (str): Unique name for this instance. 422 """ 423 424 def __init__(self, *args): 425 self.value = args[0] 426 427 def get_name(): 428 return __class__.__name__ 429 430 def eval(this): 431 pass 432 433 def __init__(self, *args): 434 super().__init__(*args) 435 436 self.valid_commands = { 437 TestParseCommand.MockCmd.get_name(): TestParseCommand.MockCmd 438 } 439 440 def _find_all_commands_in_lines(self, lines): 441 """Use DExTer parsing methods to find all the mock commands in lines. 442 443 Returns: 444 { cmd_name: { (path, line): command_obj } } 445 """ 446 cmds, declared_files = _find_all_commands_in_file( 447 __file__, lines, self.valid_commands, None 448 ) 449 return cmds 450 451 def _find_all_mock_values_in_lines(self, lines): 452 """Use DExTer parsing methods to find all mock command values in lines. 453 454 Returns: 455 values (list(str)): MockCmd values found in lines. 456 """ 457 cmds = self._find_all_commands_in_lines(lines) 458 mocks = cmds.get(TestParseCommand.MockCmd.get_name(), None) 459 return [v.value for v in mocks.values()] if mocks else [] 460 461 def test_parse_inline(self): 462 """Commands can be embedded in other text.""" 463 464 lines = [ 465 'MockCmd("START") Lorem ipsum dolor sit amet, consectetur\n', 466 'adipiscing elit, MockCmd("EMBEDDED") sed doeiusmod tempor,\n', 467 "incididunt ut labore et dolore magna aliqua.\n", 468 ] 469 470 values = self._find_all_mock_values_in_lines(lines) 471 472 self.assertTrue("START" in values) 473 self.assertTrue("EMBEDDED" in values) 474 475 def test_parse_multi_line_comment(self): 476 """Multi-line commands can embed comments.""" 477 478 lines = [ 479 "Lorem ipsum dolor sit amet, consectetur\n", 480 "adipiscing elit, sed doeiusmod tempor,\n", 481 "incididunt ut labore et MockCmd(\n", 482 ' "WITH_COMMENT" # THIS IS A COMMENT\n', 483 ") dolore magna aliqua. Ut enim ad minim\n", 484 ] 485 486 values = self._find_all_mock_values_in_lines(lines) 487 488 self.assertTrue("WITH_COMMENT" in values) 489 490 def test_parse_empty(self): 491 """Empty files are silently ignored.""" 492 493 lines = [] 494 values = self._find_all_mock_values_in_lines(lines) 495 self.assertTrue(len(values) == 0) 496 497 def test_parse_bad_whitespace(self): 498 """Throw exception when parsing badly formed whitespace.""" 499 lines = [ 500 "MockCmd\n", 501 '("XFAIL_CMD_LF_PAREN")\n', 502 ] 503 504 with self.assertRaises(CommandParseError): 505 values = self._find_all_mock_values_in_lines(lines) 506 507 def test_parse_good_whitespace(self): 508 """Try to emulate python whitespace rules""" 509 510 lines = [ 511 'MockCmd("NONE")\n', 512 'MockCmd ("SPACE")\n', 513 'MockCmd\t\t("TABS")\n', 514 'MockCmd( "ARG_SPACE" )\n', 515 'MockCmd(\t\t"ARG_TABS"\t\t)\n', 516 "MockCmd(\n", 517 '"CMD_PAREN_LF")\n', 518 ] 519 520 values = self._find_all_mock_values_in_lines(lines) 521 522 self.assertTrue("NONE" in values) 523 self.assertTrue("SPACE" in values) 524 self.assertTrue("TABS" in values) 525 self.assertTrue("ARG_SPACE" in values) 526 self.assertTrue("ARG_TABS" in values) 527 self.assertTrue("CMD_PAREN_LF" in values) 528 529 def test_parse_share_line(self): 530 """More than one command can appear on one line.""" 531 532 lines = [ 533 'MockCmd("START") MockCmd("CONSECUTIVE") words ' 534 'MockCmd("EMBEDDED") more words\n' 535 ] 536 537 values = self._find_all_mock_values_in_lines(lines) 538 539 self.assertTrue("START" in values) 540 self.assertTrue("CONSECUTIVE" in values) 541 self.assertTrue("EMBEDDED" in values) 542 543 def test_parse_escaped(self): 544 """Escaped commands are ignored.""" 545 546 lines = ['words \\MockCmd("IGNORED") words words words\n'] 547 548 values = self._find_all_mock_values_in_lines(lines) 549 550 self.assertFalse("IGNORED" in values) 551