1""" 2Specification, compiler, disassembler, and interpreter 3for LLDB dataformatter bytecode. 4 5See https://lldb.llvm.org/resources/formatterbytecode.html for more details. 6""" 7 8from __future__ import annotations 9 10# Types 11type_String = 1 12type_Int = 2 13type_UInt = 3 14type_Object = 4 15type_Type = 5 16 17# Opcodes 18opcode = dict() 19 20 21def define_opcode(n, mnemonic, name): 22 globals()["op_" + name] = n 23 if mnemonic: 24 opcode[mnemonic] = n 25 opcode[n] = mnemonic 26 27 28define_opcode(1, "dup", "dup") 29define_opcode(2, "drop", "drop") 30define_opcode(3, "pick", "pick") 31define_opcode(4, "over", "over") 32define_opcode(5, "swap", "swap") 33define_opcode(6, "rot", "rot") 34 35define_opcode(0x10, "{", "begin") 36define_opcode(0x11, "if", "if") 37define_opcode(0x12, "ifelse", "ifelse") 38define_opcode(0x13, "return", "return") 39 40define_opcode(0x20, None, "lit_uint") 41define_opcode(0x21, None, "lit_int") 42define_opcode(0x22, None, "lit_string") 43define_opcode(0x23, None, "lit_selector") 44 45define_opcode(0x2A, "as_int", "as_int") 46define_opcode(0x2B, "as_uint", "as_uint") 47define_opcode(0x2C, "is_null", "is_null") 48 49define_opcode(0x30, "+", "plus") 50define_opcode(0x31, "-", "minus") 51define_opcode(0x32, "*", "mul") 52define_opcode(0x33, "/", "div") 53define_opcode(0x34, "%", "mod") 54define_opcode(0x35, "<<", "shl") 55define_opcode(0x36, ">>", "shr") 56 57define_opcode(0x40, "&", "and") 58define_opcode(0x41, "|", "or") 59define_opcode(0x42, "^", "xor") 60define_opcode(0x43, "~", "not") 61 62define_opcode(0x50, "=", "eq") 63define_opcode(0x51, "!=", "neq") 64define_opcode(0x52, "<", "lt") 65define_opcode(0x53, ">", "gt") 66define_opcode(0x54, "=<", "le") 67define_opcode(0x55, ">=", "ge") 68 69define_opcode(0x60, "call", "call") 70 71# Function signatures 72sig_summary = 0 73sig_init = 1 74sig_get_num_children = 2 75sig_get_child_index = 3 76sig_get_child_at_index = 4 77 78# Selectors 79selector = dict() 80 81 82def define_selector(n, name): 83 globals()["sel_" + name] = n 84 selector["@" + name] = n 85 selector[n] = "@" + name 86 87 88define_selector(0, "summary") 89define_selector(1, "type_summary") 90 91define_selector(0x10, "get_num_children") 92define_selector(0x11, "get_child_at_index") 93define_selector(0x12, "get_child_with_name") 94define_selector(0x13, "get_child_index") 95define_selector(0x15, "get_type") 96define_selector(0x16, "get_template_argument_type") 97define_selector(0x17, "cast") 98define_selector(0x20, "get_value") 99define_selector(0x21, "get_value_as_unsigned") 100define_selector(0x22, "get_value_as_signed") 101define_selector(0x23, "get_value_as_address") 102 103define_selector(0x40, "read_memory_byte") 104define_selector(0x41, "read_memory_uint32") 105define_selector(0x42, "read_memory_int32") 106define_selector(0x43, "read_memory_unsigned") 107define_selector(0x44, "read_memory_signed") 108define_selector(0x45, "read_memory_address") 109define_selector(0x46, "read_memory") 110 111define_selector(0x50, "fmt") 112define_selector(0x51, "sprintf") 113define_selector(0x52, "strlen") 114 115 116################################################################################ 117# Compiler. 118################################################################################ 119 120 121def compile(assembler: str) -> bytearray: 122 """Compile assembler into bytecode""" 123 # This is a stack of all in-flight/unterminated blocks. 124 bytecode = [bytearray()] 125 126 def emit(byte): 127 bytecode[-1].append(byte) 128 129 tokens = list(assembler.split(" ")) 130 tokens.reverse() 131 while tokens: 132 tok = tokens.pop() 133 if tok == "": 134 pass 135 elif tok == "{": 136 bytecode.append(bytearray()) 137 elif tok == "}": 138 block = bytecode.pop() 139 emit(op_begin) 140 emit(len(block)) # FIXME: uleb 141 bytecode[-1].extend(block) 142 elif tok[0].isdigit(): 143 if tok[-1] == "u": 144 emit(op_lit_uint) 145 emit(int(tok[:-1])) # FIXME 146 else: 147 emit(op_lit_int) 148 emit(int(tok)) # FIXME 149 elif tok[0] == "@": 150 emit(op_lit_selector) 151 emit(selector[tok]) 152 elif tok[0] == '"': 153 s = bytearray() 154 done = False 155 chrs = tok[1:] 156 while not done: 157 quoted = False 158 for c in chrs: 159 if quoted: 160 s.append(ord(c)) # FIXME 161 quoted = False 162 elif c == "\\": 163 quoted = True 164 elif c == '"': 165 done = True 166 break 167 # FIXME assert this is last in token 168 else: 169 s.append(ord(c)) 170 if not done: 171 s.append(ord(" ")) 172 chrs = tokens.pop() 173 174 emit(op_lit_string) 175 emit(len(s)) 176 bytecode[-1].extend(s) 177 else: 178 emit(opcode[tok]) 179 assert len(bytecode) == 1 # unterminated { 180 return bytecode[0] 181 182 183################################################################################ 184# Disassembler. 185################################################################################ 186 187 188def disassemble(bytecode: bytearray) -> (str, int): 189 """Disassemble bytecode into (assembler, token starts)""" 190 asm = "" 191 all_bytes = list(bytecode) 192 all_bytes.reverse() 193 blocks = [] 194 tokens = [0] 195 196 def next_byte(): 197 """Fetch the next byte in the bytecode and keep track of all 198 in-flight blocks""" 199 for i in range(len(blocks)): 200 blocks[i] -= 1 201 tokens.append(len(asm)) 202 return all_bytes.pop() 203 204 while all_bytes: 205 b = next_byte() 206 if b == op_begin: 207 asm += "{" 208 length = next_byte() 209 blocks.append(length) 210 elif b == op_lit_uint: 211 b = next_byte() 212 asm += str(b) # FIXME uleb 213 asm += "u" 214 elif b == op_lit_int: 215 b = next_byte() 216 asm += str(b) 217 elif b == op_lit_selector: 218 b = next_byte() 219 asm += selector[b] 220 elif b == op_lit_string: 221 length = next_byte() 222 s = "'" 223 while length: 224 s += chr(next_byte()) 225 length -= 1 226 asm += '"' + repr(s)[2:] 227 else: 228 asm += opcode[b] 229 230 while blocks and blocks[-1] == 0: 231 asm += " }" 232 blocks.pop() 233 234 if all_bytes: 235 asm += " " 236 237 if blocks: 238 asm += "ERROR" 239 return asm, tokens 240 241 242################################################################################ 243# Interpreter. 244################################################################################ 245 246 247def count_fmt_params(fmt: str) -> int: 248 """Count the number of parameters in a format string""" 249 from string import Formatter 250 251 f = Formatter() 252 n = 0 253 for _, name, _, _ in f.parse(fmt): 254 if name > n: 255 n = name 256 return n 257 258 259def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False): 260 """Interpret bytecode""" 261 frame = [] 262 frame.append((0, len(bytecode))) 263 264 def trace(): 265 """print a trace of the execution for debugging purposes""" 266 267 def fmt(d): 268 if isinstance(d, int): 269 return str(d) 270 if isinstance(d, str): 271 return d 272 return repr(type(d)) 273 274 pc, end = frame[-1] 275 asm, tokens = disassemble(bytecode) 276 print( 277 "=== frame = {1}, data = {2}, opcode = {0}".format( 278 opcode[b], frame, [fmt(d) for d in data] 279 ) 280 ) 281 print(asm) 282 print(" " * (tokens[pc]) + "^") 283 284 def next_byte(): 285 """Fetch the next byte and update the PC""" 286 pc, end = frame[-1] 287 assert pc < len(bytecode) 288 b = bytecode[pc] 289 frame[-1] = pc + 1, end 290 # At the end of a block? 291 while pc >= end: 292 frame.pop() 293 if not frame: 294 return None 295 pc, end = frame[-1] 296 if pc >= end: 297 return None 298 b = bytecode[pc] 299 frame[-1] = pc + 1, end 300 return b 301 302 while frame[-1][0] < len(bytecode): 303 b = next_byte() 304 if b == None: 305 break 306 if tracing: 307 trace() 308 # Data stack manipulation. 309 if b == op_dup: 310 data.append(data[-1]) 311 elif b == op_drop: 312 data.pop() 313 elif b == op_pick: 314 data.append(data[data.pop()]) 315 elif b == op_over: 316 data.append(data[-2]) 317 elif b == op_swap: 318 x = data.pop() 319 y = data.pop() 320 data.append(x) 321 data.append(y) 322 elif b == op_rot: 323 z = data.pop() 324 y = data.pop() 325 x = data.pop() 326 data.append(z) 327 data.append(x) 328 data.append(y) 329 330 # Control stack manipulation. 331 elif b == op_begin: 332 length = next_byte() 333 pc, end = frame[-1] 334 control.append((pc, pc + length)) 335 frame[-1] = pc + length, end 336 elif b == op_if: 337 if data.pop(): 338 frame.append(control.pop()) 339 elif b == op_ifelse: 340 if data.pop(): 341 control.pop() 342 frame.append(control.pop()) 343 else: 344 frame.append(control.pop()) 345 control.pop() 346 elif b == op_return: 347 control.clear() 348 return data[-1] 349 350 # Literals. 351 elif b == op_lit_uint: 352 b = next_byte() # FIXME uleb 353 data.append(int(b)) 354 elif b == op_lit_int: 355 b = next_byte() # FIXME uleb 356 data.append(int(b)) 357 elif b == op_lit_selector: 358 b = next_byte() 359 data.append(b) 360 elif b == op_lit_string: 361 length = next_byte() 362 s = "" 363 while length: 364 s += chr(next_byte()) 365 length -= 1 366 data.append(s) 367 368 elif b == op_as_uint: 369 pass 370 elif b == op_as_int: 371 pass 372 elif b == op_is_null: 373 data.append(1 if data.pop() == None else 0) 374 375 # Arithmetic, logic, etc. 376 elif b == op_plus: 377 data.append(data.pop() + data.pop()) 378 elif b == op_minus: 379 data.append(-data.pop() + data.pop()) 380 elif b == op_mul: 381 data.append(data.pop() * data.pop()) 382 elif b == op_div: 383 y = data.pop() 384 data.append(data.pop() / y) 385 elif b == op_mod: 386 y = data.pop() 387 data.append(data.pop() % y) 388 elif b == op_shl: 389 y = data.pop() 390 data.append(data.pop() << y) 391 elif b == op_shr: 392 y = data.pop() 393 data.append(data.pop() >> y) 394 elif b == op_and: 395 data.append(data.pop() & data.pop()) 396 elif b == op_or: 397 data.append(data.pop() | data.pop()) 398 elif b == op_xor: 399 data.append(data.pop() ^ data.pop()) 400 elif b == op_not: 401 data.append(not data.pop()) 402 elif b == op_eq: 403 data.append(data.pop() == data.pop()) 404 elif b == op_neq: 405 data.append(data.pop() != data.pop()) 406 elif b == op_lt: 407 data.append(data.pop() > data.pop()) 408 elif b == op_gt: 409 data.append(data.pop() < data.pop()) 410 elif b == op_le: 411 data.append(data.pop() >= data.pop()) 412 elif b == op_ge: 413 data.append(data.pop() <= data.pop()) 414 415 # Function calls. 416 elif b == op_call: 417 sel = data.pop() 418 if sel == sel_summary: 419 data.append(data.pop().GetSummary()) 420 elif sel == sel_get_num_children: 421 data.append(data.pop().GetNumChildren()) 422 elif sel == sel_get_child_at_index: 423 index = data.pop() 424 valobj = data.pop() 425 data.append(valobj.GetChildAtIndex(index)) 426 elif sel == sel_get_child_with_name: 427 name = data.pop() 428 valobj = data.pop() 429 data.append(valobj.GetChildMemberWithName(name)) 430 elif sel == sel_get_child_index: 431 name = data.pop() 432 valobj = data.pop() 433 data.append(valobj.GetIndexOfChildWithName(name)) 434 elif sel == sel_get_type: 435 data.append(data.pop().GetType()) 436 elif sel == sel_get_template_argument_type: 437 n = data.pop() 438 valobj = data.pop() 439 data.append(valobj.GetTemplateArgumentType(n)) 440 elif sel == sel_get_value: 441 data.append(data.pop().GetValue()) 442 elif sel == sel_get_value_as_unsigned: 443 data.append(data.pop().GetValueAsUnsigned()) 444 elif sel == sel_get_value_as_signed: 445 data.append(data.pop().GetValueAsSigned()) 446 elif sel == sel_get_value_as_address: 447 data.append(data.pop().GetValueAsAddress()) 448 elif sel == sel_cast: 449 sbtype = data.pop() 450 valobj = data.pop() 451 data.append(valobj.Cast(sbtype)) 452 elif sel == sel_strlen: 453 s = data.pop() 454 data.append(len(s) if s else 0) 455 elif sel == sel_fmt: 456 fmt = data.pop() 457 n = count_fmt_params(fmt) 458 args = [] 459 for i in range(n): 460 args.append(data.pop()) 461 data.append(fmt.format(*args)) 462 else: 463 print("not implemented: " + selector[sel]) 464 assert False 465 pass 466 return data[-1] 467 468 469if __name__ == "__main__": 470 # Work around the fact that one of the local files is called 471 # types.py, which breaks some versions of python. 472 import os, sys 473 474 path = os.path.abspath(os.path.dirname(__file__)) 475 sys.path.remove(path) 476 import argparse 477 478 parser = argparse.ArgumentParser( 479 description=""" 480 Compiler, disassembler, and interpreter for LLDB dataformatter bytecode. 481 See https://lldb.llvm.org/resources/formatterbytecode.html for more details. 482 """ 483 ) 484 parser.add_argument( 485 "-c", "--compile", type=str, help="compile assembler into bytecode" 486 ) 487 parser.add_argument("-d", "--disassemble", type=str, help="disassemble bytecode") 488 parser.add_argument("-t", "--test", action="store_true", help="run unit tests") 489 args = parser.parse_args() 490 if args.compile: 491 print(compile(str(args.compile)).hex()) 492 493 if args.disassemble: 494 print(disassemble(bytearray.fromhex(str(args.disassemble)))) 495 496 ############################################################################ 497 # Tests. 498 ############################################################################ 499 if args.test: 500 import unittest 501 502 class TestCompiler(unittest.TestCase): 503 def test(self): 504 self.assertEqual(compile("1u dup").hex(), "200101") 505 self.assertEqual(compile('"1u dup"').hex(), "2206317520647570") 506 self.assertEqual(compile("16 < { dup } if").hex(), "21105210010111") 507 self.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20") 508 509 def roundtrip(asm): 510 self.assertEqual(disassemble(compile(asm))[0], asm) 511 512 roundtrip("1u dup") 513 roundtrip('1u dup "1u dup"') 514 roundtrip("16 < { dup } if") 515 roundtrip('{ { " } " } }') 516 517 self.assertEqual(interpret(compile("1 1 +"), [], []), 2) 518 self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4) 519 self.assertEqual( 520 interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes" 521 ) 522 523 import sys 524 525 sys.argv.pop() 526 path = os.path.dirname(__file__) 527 sys.path.remove 528 unittest.main() 529