1""" 2Specification, compiler, disassembler, and interpreter 3for LLDB dataformatter bytecode. 4 5See https://lldb.llvm.org/resources/formatterbytecode.html for more details. 6""" 7 8from __future__ import annotations 9 10# Types 11type_String = 1 12type_Int = 2 13type_UInt = 3 14type_Object = 4 15type_Type = 5 16 17# Opcodes 18opcode = dict() 19 20 21def define_opcode(n, mnemonic, name): 22 globals()["op_" + name] = n 23 if mnemonic: 24 opcode[mnemonic] = n 25 opcode[n] = mnemonic 26 27 28define_opcode(1, "dup", "dup") 29define_opcode(2, "drop", "drop") 30define_opcode(3, "pick", "pick") 31define_opcode(4, "over", "over") 32define_opcode(5, "swap", "swap") 33define_opcode(6, "rot", "rot") 34 35define_opcode(0x10, "{", "begin") 36define_opcode(0x11, "if", "if") 37define_opcode(0x12, "ifelse", "ifelse") 38 39define_opcode(0x20, None, "lit_uint") 40define_opcode(0x21, None, "lit_int") 41define_opcode(0x22, None, "lit_string") 42define_opcode(0x23, None, "lit_selector") 43 44define_opcode(0x2A, "as_int", "as_int") 45define_opcode(0x2B, "as_uint", "as_uint") 46define_opcode(0x2C, "is_null", "is_null") 47 48define_opcode(0x30, "+", "plus") 49define_opcode(0x31, "-", "minus") 50define_opcode(0x32, "*", "mul") 51define_opcode(0x33, "/", "div") 52define_opcode(0x34, "%", "mod") 53define_opcode(0x35, "<<", "shl") 54define_opcode(0x36, ">>", "shr") 55 56define_opcode(0x40, "&", "and") 57define_opcode(0x41, "|", "or") 58define_opcode(0x42, "^", "xor") 59define_opcode(0x43, "~", "not") 60 61define_opcode(0x50, "=", "eq") 62define_opcode(0x51, "!=", "neq") 63define_opcode(0x52, "<", "lt") 64define_opcode(0x53, ">", "gt") 65define_opcode(0x54, "=<", "le") 66define_opcode(0x55, ">=", "ge") 67 68define_opcode(0x60, "call", "call") 69 70# Function signatures 71sig_summary = 0 72sig_init = 1 73sig_get_num_children = 2 74sig_get_child_index = 3 75sig_get_child_at_index = 4 76 77# Selectors 78selector = dict() 79 80 81def define_selector(n, name): 82 globals()["sel_" + name] = n 83 selector["@" + name] = n 84 selector[n] = "@" + name 85 86 87define_selector(0, "summary") 88define_selector(1, "type_summary") 89 90define_selector(0x10, "get_num_children") 91define_selector(0x11, "get_child_at_index") 92define_selector(0x12, "get_child_with_name") 93define_selector(0x13, "get_child_index") 94define_selector(0x15, "get_type") 95define_selector(0x16, "get_template_argument_type") 96define_selector(0x17, "cast") 97define_selector(0x20, "get_value") 98define_selector(0x21, "get_value_as_unsigned") 99define_selector(0x22, "get_value_as_signed") 100define_selector(0x23, "get_value_as_address") 101 102define_selector(0x40, "read_memory_byte") 103define_selector(0x41, "read_memory_uint32") 104define_selector(0x42, "read_memory_int32") 105define_selector(0x43, "read_memory_unsigned") 106define_selector(0x44, "read_memory_signed") 107define_selector(0x45, "read_memory_address") 108define_selector(0x46, "read_memory") 109 110define_selector(0x50, "fmt") 111define_selector(0x51, "sprintf") 112define_selector(0x52, "strlen") 113 114 115################################################################################ 116# Compiler. 117################################################################################ 118 119 120def compile(assembler: str) -> bytearray: 121 """Compile assembler into bytecode""" 122 # This is a stack of all in-flight/unterminated blocks. 123 bytecode = [bytearray()] 124 125 def emit(byte): 126 bytecode[-1].append(byte) 127 128 tokens = list(assembler.split(" ")) 129 tokens.reverse() 130 while tokens: 131 tok = tokens.pop() 132 if tok == "": 133 pass 134 elif tok == "{": 135 bytecode.append(bytearray()) 136 elif tok == "}": 137 block = bytecode.pop() 138 emit(op_begin) 139 emit(len(block)) # FIXME: uleb 140 bytecode[-1].extend(block) 141 elif tok[0].isdigit(): 142 if tok[-1] == "u": 143 emit(op_lit_uint) 144 emit(int(tok[:-1])) # FIXME 145 else: 146 emit(op_lit_int) 147 emit(int(tok)) # FIXME 148 elif tok[0] == "@": 149 emit(op_lit_selector) 150 emit(selector[tok]) 151 elif tok[0] == '"': 152 s = bytearray() 153 done = False 154 chrs = tok[1:] 155 while not done: 156 quoted = False 157 for c in chrs: 158 if quoted: 159 s.append(ord(c)) # FIXME 160 quoted = False 161 elif c == "\\": 162 quoted = True 163 elif c == '"': 164 done = True 165 break 166 # FIXME assert this is last in token 167 else: 168 s.append(ord(c)) 169 if not done: 170 s.append(ord(" ")) 171 chrs = tokens.pop() 172 173 emit(op_lit_string) 174 emit(len(s)) 175 bytecode[-1].extend(s) 176 else: 177 emit(opcode[tok]) 178 assert len(bytecode) == 1 # unterminated { 179 return bytecode[0] 180 181 182################################################################################ 183# Disassembler. 184################################################################################ 185 186 187def disassemble(bytecode: bytearray) -> (str, int): 188 """Disassemble bytecode into (assembler, token starts)""" 189 asm = "" 190 all_bytes = list(bytecode) 191 all_bytes.reverse() 192 blocks = [] 193 tokens = [0] 194 195 def next_byte(): 196 """Fetch the next byte in the bytecode and keep track of all 197 in-flight blocks""" 198 for i in range(len(blocks)): 199 blocks[i] -= 1 200 tokens.append(len(asm)) 201 return all_bytes.pop() 202 203 while all_bytes: 204 b = next_byte() 205 if b == op_begin: 206 asm += "{" 207 length = next_byte() 208 blocks.append(length) 209 elif b == op_lit_uint: 210 b = next_byte() 211 asm += str(b) # FIXME uleb 212 asm += "u" 213 elif b == op_lit_int: 214 b = next_byte() 215 asm += str(b) 216 elif b == op_lit_selector: 217 b = next_byte() 218 asm += selector[b] 219 elif b == op_lit_string: 220 length = next_byte() 221 s = "'" 222 while length: 223 s += chr(next_byte()) 224 length -= 1 225 asm += '"' + repr(s)[2:] 226 else: 227 asm += opcode[b] 228 229 while blocks and blocks[-1] == 0: 230 asm += " }" 231 blocks.pop() 232 233 if all_bytes: 234 asm += " " 235 236 if blocks: 237 asm += "ERROR" 238 return asm, tokens 239 240 241################################################################################ 242# Interpreter. 243################################################################################ 244 245 246def count_fmt_params(fmt: str) -> int: 247 """Count the number of parameters in a format string""" 248 from string import Formatter 249 250 f = Formatter() 251 n = 0 252 for _, name, _, _ in f.parse(fmt): 253 if name > n: 254 n = name 255 return n 256 257 258def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False): 259 """Interpret bytecode""" 260 frame = [] 261 frame.append((0, len(bytecode))) 262 263 def trace(): 264 """print a trace of the execution for debugging purposes""" 265 266 def fmt(d): 267 if isinstance(d, int): 268 return str(d) 269 if isinstance(d, str): 270 return d 271 return repr(type(d)) 272 273 pc, end = frame[-1] 274 asm, tokens = disassemble(bytecode) 275 print( 276 "=== frame = {1}, data = {2}, opcode = {0}".format( 277 opcode[b], frame, [fmt(d) for d in data] 278 ) 279 ) 280 print(asm) 281 print(" " * (tokens[pc]) + "^") 282 283 def next_byte(): 284 """Fetch the next byte and update the PC""" 285 pc, end = frame[-1] 286 assert pc < len(bytecode) 287 b = bytecode[pc] 288 frame[-1] = pc + 1, end 289 # At the end of a block? 290 while pc >= end: 291 frame.pop() 292 if not frame: 293 return None 294 pc, end = frame[-1] 295 if pc >= end: 296 return None 297 b = bytecode[pc] 298 frame[-1] = pc + 1, end 299 return b 300 301 while frame[-1][0] < len(bytecode): 302 b = next_byte() 303 if b == None: 304 break 305 if tracing: 306 trace() 307 # Data stack manipulation. 308 if b == op_dup: 309 data.append(data[-1]) 310 elif b == op_drop: 311 data.pop() 312 elif b == op_pick: 313 data.append(data[data.pop()]) 314 elif b == op_over: 315 data.append(data[-2]) 316 elif b == op_swap: 317 x = data.pop() 318 y = data.pop() 319 data.append(x) 320 data.append(y) 321 elif b == op_rot: 322 z = data.pop() 323 y = data.pop() 324 x = data.pop() 325 data.append(z) 326 data.append(x) 327 data.append(y) 328 329 # Control stack manipulation. 330 elif b == op_begin: 331 length = next_byte() 332 pc, end = frame[-1] 333 control.append((pc, pc + length)) 334 frame[-1] = pc + length, end 335 elif b == op_if: 336 if data.pop(): 337 frame.append(control.pop()) 338 elif b == op_ifelse: 339 if data.pop(): 340 control.pop() 341 frame.append(control.pop()) 342 else: 343 frame.append(control.pop()) 344 control.pop() 345 346 # Literals. 347 elif b == op_lit_uint: 348 b = next_byte() # FIXME uleb 349 data.append(int(b)) 350 elif b == op_lit_int: 351 b = next_byte() # FIXME uleb 352 data.append(int(b)) 353 elif b == op_lit_selector: 354 b = next_byte() 355 data.append(b) 356 elif b == op_lit_string: 357 length = next_byte() 358 s = "" 359 while length: 360 s += chr(next_byte()) 361 length -= 1 362 data.append(s) 363 364 elif b == op_as_uint: 365 pass 366 elif b == op_as_int: 367 pass 368 elif b == op_is_null: 369 data.append(1 if data.pop() == None else 0) 370 371 # Arithmetic, logic, etc. 372 elif b == op_plus: 373 data.append(data.pop() + data.pop()) 374 elif b == op_minus: 375 data.append(-data.pop() + data.pop()) 376 elif b == op_mul: 377 data.append(data.pop() * data.pop()) 378 elif b == op_div: 379 y = data.pop() 380 data.append(data.pop() / y) 381 elif b == op_mod: 382 y = data.pop() 383 data.append(data.pop() % y) 384 elif b == op_shl: 385 y = data.pop() 386 data.append(data.pop() << y) 387 elif b == op_shr: 388 y = data.pop() 389 data.append(data.pop() >> y) 390 elif b == op_and: 391 data.append(data.pop() & data.pop()) 392 elif b == op_or: 393 data.append(data.pop() | data.pop()) 394 elif b == op_xor: 395 data.append(data.pop() ^ data.pop()) 396 elif b == op_not: 397 data.append(not data.pop()) 398 elif b == op_eq: 399 data.append(data.pop() == data.pop()) 400 elif b == op_neq: 401 data.append(data.pop() != data.pop()) 402 elif b == op_lt: 403 data.append(data.pop() > data.pop()) 404 elif b == op_gt: 405 data.append(data.pop() < data.pop()) 406 elif b == op_le: 407 data.append(data.pop() >= data.pop()) 408 elif b == op_ge: 409 data.append(data.pop() <= data.pop()) 410 411 # Function calls. 412 elif b == op_call: 413 sel = data.pop() 414 if sel == sel_summary: 415 data.append(data.pop().GetSummary()) 416 elif sel == sel_get_num_children: 417 data.append(data.pop().GetNumChildren()) 418 elif sel == sel_get_child_at_index: 419 index = data.pop() 420 valobj = data.pop() 421 data.append(valobj.GetChildAtIndex(index)) 422 elif sel == sel_get_child_with_name: 423 name = data.pop() 424 valobj = data.pop() 425 data.append(valobj.GetChildMemberWithName(name)) 426 elif sel == sel_get_child_index: 427 name = data.pop() 428 valobj = data.pop() 429 data.append(valobj.GetIndexOfChildWithName(name)) 430 elif sel == sel_get_type: 431 data.append(data.pop().GetType()) 432 elif sel == sel_get_template_argument_type: 433 n = data.pop() 434 valobj = data.pop() 435 data.append(valobj.GetTemplateArgumentType(n)) 436 elif sel == sel_get_value: 437 data.append(data.pop().GetValue()) 438 elif sel == sel_get_value_as_unsigned: 439 data.append(data.pop().GetValueAsUnsigned()) 440 elif sel == sel_get_value_as_signed: 441 data.append(data.pop().GetValueAsSigned()) 442 elif sel == sel_get_value_as_address: 443 data.append(data.pop().GetValueAsAddress()) 444 elif sel == sel_cast: 445 sbtype = data.pop() 446 valobj = data.pop() 447 data.append(valobj.Cast(sbtype)) 448 elif sel == sel_strlen: 449 s = data.pop() 450 data.append(len(s) if s else 0) 451 elif sel == sel_fmt: 452 fmt = data.pop() 453 n = count_fmt_params(fmt) 454 args = [] 455 for i in range(n): 456 args.append(data.pop()) 457 data.append(fmt.format(*args)) 458 else: 459 print("not implemented: " + selector[sel]) 460 assert False 461 pass 462 return data[-1] 463 464 465if __name__ == "__main__": 466 import argparse 467 468 parser = argparse.ArgumentParser( 469 description=""" 470 Compiler, disassembler, and interpreter for LLDB dataformatter bytecode. 471 See https://lldb.llvm.org/resources/formatterbytecode.html for more details. 472 """ 473 ) 474 parser.add_argument( 475 "-c", "--compile", type=str, help="compile assembler into bytecode" 476 ) 477 parser.add_argument("-d", "--disassemble", type=str, help="disassemble bytecode") 478 parser.add_argument("-t", "--test", action="store_true", help="run unit tests") 479 args = parser.parse_args() 480 if args.compile: 481 print(compile(str(args.compile)).hex()) 482 483 if args.disassemble: 484 print(disassemble(bytearray.fromhex(str(args.disassemble)))) 485 486 ############################################################################ 487 # Tests. 488 ############################################################################ 489 if args.test: 490 # Work around the fact that one of the local files is calles 491 # types.py, which breaks some versions of python. 492 import os, sys 493 494 path = os.path.abspath(os.path.dirname(__file__)) 495 sys.path.remove(path) 496 import unittest 497 498 class TestCompiler(unittest.TestCase): 499 def test(self): 500 self.assertEqual(compile("1u dup").hex(), "200101") 501 self.assertEqual(compile('"1u dup"').hex(), "2206317520647570") 502 self.assertEqual(compile("16 < { dup } if").hex(), "21105210010111") 503 self.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20") 504 505 def roundtrip(asm): 506 self.assertEqual(disassemble(compile(asm))[0], asm) 507 508 roundtrip("1u dup") 509 roundtrip('1u dup "1u dup"') 510 roundtrip("16 < { dup } if") 511 roundtrip('{ { " } " } }') 512 513 self.assertEqual(interpret(compile("1 1 +"), [], []), 2) 514 self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4) 515 self.assertEqual( 516 interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes" 517 ) 518 519 import sys 520 521 sys.argv.pop() 522 path = os.path.dirname(__file__) 523 sys.path.remove 524 unittest.main() 525