xref: /llvm-project/lldb/examples/python/formatter_bytecode.py (revision 60380cd27c6fa5ed6e39866c51b18a64bc4d566a)
1"""
2Specification, compiler, disassembler, and interpreter
3for LLDB dataformatter bytecode.
4
5See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
6"""
7
8from __future__ import annotations
9
10# Types
11type_String = 1
12type_Int = 2
13type_UInt = 3
14type_Object = 4
15type_Type = 5
16
17# Opcodes
18opcode = dict()
19
20
21def define_opcode(n, mnemonic, name):
22    globals()["op_" + name] = n
23    if mnemonic:
24        opcode[mnemonic] = n
25    opcode[n] = mnemonic
26
27
28define_opcode(1, "dup", "dup")
29define_opcode(2, "drop", "drop")
30define_opcode(3, "pick", "pick")
31define_opcode(4, "over", "over")
32define_opcode(5, "swap", "swap")
33define_opcode(6, "rot", "rot")
34
35define_opcode(0x10, "{", "begin")
36define_opcode(0x11, "if", "if")
37define_opcode(0x12, "ifelse", "ifelse")
38
39define_opcode(0x20, None, "lit_uint")
40define_opcode(0x21, None, "lit_int")
41define_opcode(0x22, None, "lit_string")
42define_opcode(0x23, None, "lit_selector")
43
44define_opcode(0x2A, "as_int", "as_int")
45define_opcode(0x2B, "as_uint", "as_uint")
46define_opcode(0x2C, "is_null", "is_null")
47
48define_opcode(0x30, "+", "plus")
49define_opcode(0x31, "-", "minus")
50define_opcode(0x32, "*", "mul")
51define_opcode(0x33, "/", "div")
52define_opcode(0x34, "%", "mod")
53define_opcode(0x35, "<<", "shl")
54define_opcode(0x36, ">>", "shr")
55
56define_opcode(0x40, "&", "and")
57define_opcode(0x41, "|", "or")
58define_opcode(0x42, "^", "xor")
59define_opcode(0x43, "~", "not")
60
61define_opcode(0x50, "=", "eq")
62define_opcode(0x51, "!=", "neq")
63define_opcode(0x52, "<", "lt")
64define_opcode(0x53, ">", "gt")
65define_opcode(0x54, "=<", "le")
66define_opcode(0x55, ">=", "ge")
67
68define_opcode(0x60, "call", "call")
69
70# Function signatures
71sig_summary = 0
72sig_init = 1
73sig_get_num_children = 2
74sig_get_child_index = 3
75sig_get_child_at_index = 4
76
77# Selectors
78selector = dict()
79
80
81def define_selector(n, name):
82    globals()["sel_" + name] = n
83    selector["@" + name] = n
84    selector[n] = "@" + name
85
86
87define_selector(0, "summary")
88define_selector(1, "type_summary")
89
90define_selector(0x10, "get_num_children")
91define_selector(0x11, "get_child_at_index")
92define_selector(0x12, "get_child_with_name")
93define_selector(0x13, "get_child_index")
94define_selector(0x15, "get_type")
95define_selector(0x16, "get_template_argument_type")
96define_selector(0x17, "cast")
97define_selector(0x20, "get_value")
98define_selector(0x21, "get_value_as_unsigned")
99define_selector(0x22, "get_value_as_signed")
100define_selector(0x23, "get_value_as_address")
101
102define_selector(0x40, "read_memory_byte")
103define_selector(0x41, "read_memory_uint32")
104define_selector(0x42, "read_memory_int32")
105define_selector(0x43, "read_memory_unsigned")
106define_selector(0x44, "read_memory_signed")
107define_selector(0x45, "read_memory_address")
108define_selector(0x46, "read_memory")
109
110define_selector(0x50, "fmt")
111define_selector(0x51, "sprintf")
112define_selector(0x52, "strlen")
113
114
115################################################################################
116# Compiler.
117################################################################################
118
119
120def compile(assembler: str) -> bytearray:
121    """Compile assembler into bytecode"""
122    # This is a stack of all in-flight/unterminated blocks.
123    bytecode = [bytearray()]
124
125    def emit(byte):
126        bytecode[-1].append(byte)
127
128    tokens = list(assembler.split(" "))
129    tokens.reverse()
130    while tokens:
131        tok = tokens.pop()
132        if tok == "":
133            pass
134        elif tok == "{":
135            bytecode.append(bytearray())
136        elif tok == "}":
137            block = bytecode.pop()
138            emit(op_begin)
139            emit(len(block))  # FIXME: uleb
140            bytecode[-1].extend(block)
141        elif tok[0].isdigit():
142            if tok[-1] == "u":
143                emit(op_lit_uint)
144                emit(int(tok[:-1]))  # FIXME
145            else:
146                emit(op_lit_int)
147                emit(int(tok))  # FIXME
148        elif tok[0] == "@":
149            emit(op_lit_selector)
150            emit(selector[tok])
151        elif tok[0] == '"':
152            s = bytearray()
153            done = False
154            chrs = tok[1:]
155            while not done:
156                quoted = False
157                for c in chrs:
158                    if quoted:
159                        s.append(ord(c))  # FIXME
160                        quoted = False
161                    elif c == "\\":
162                        quoted = True
163                    elif c == '"':
164                        done = True
165                        break
166                        # FIXME assert this is last in token
167                    else:
168                        s.append(ord(c))
169                if not done:
170                    s.append(ord(" "))
171                    chrs = tokens.pop()
172
173            emit(op_lit_string)
174            emit(len(s))
175            bytecode[-1].extend(s)
176        else:
177            emit(opcode[tok])
178    assert len(bytecode) == 1  # unterminated {
179    return bytecode[0]
180
181
182################################################################################
183# Disassembler.
184################################################################################
185
186
187def disassemble(bytecode: bytearray) -> (str, int):
188    """Disassemble bytecode into (assembler, token starts)"""
189    asm = ""
190    all_bytes = list(bytecode)
191    all_bytes.reverse()
192    blocks = []
193    tokens = [0]
194
195    def next_byte():
196        """Fetch the next byte in the bytecode and keep track of all
197        in-flight blocks"""
198        for i in range(len(blocks)):
199            blocks[i] -= 1
200        tokens.append(len(asm))
201        return all_bytes.pop()
202
203    while all_bytes:
204        b = next_byte()
205        if b == op_begin:
206            asm += "{"
207            length = next_byte()
208            blocks.append(length)
209        elif b == op_lit_uint:
210            b = next_byte()
211            asm += str(b)  # FIXME uleb
212            asm += "u"
213        elif b == op_lit_int:
214            b = next_byte()
215            asm += str(b)
216        elif b == op_lit_selector:
217            b = next_byte()
218            asm += selector[b]
219        elif b == op_lit_string:
220            length = next_byte()
221            s = "'"
222            while length:
223                s += chr(next_byte())
224                length -= 1
225            asm += '"' + repr(s)[2:]
226        else:
227            asm += opcode[b]
228
229        while blocks and blocks[-1] == 0:
230            asm += " }"
231            blocks.pop()
232
233        if all_bytes:
234            asm += " "
235
236    if blocks:
237        asm += "ERROR"
238    return asm, tokens
239
240
241################################################################################
242# Interpreter.
243################################################################################
244
245
246def count_fmt_params(fmt: str) -> int:
247    """Count the number of parameters in a format string"""
248    from string import Formatter
249
250    f = Formatter()
251    n = 0
252    for _, name, _, _ in f.parse(fmt):
253        if name > n:
254            n = name
255    return n
256
257
258def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False):
259    """Interpret bytecode"""
260    frame = []
261    frame.append((0, len(bytecode)))
262
263    def trace():
264        """print a trace of the execution for debugging purposes"""
265
266        def fmt(d):
267            if isinstance(d, int):
268                return str(d)
269            if isinstance(d, str):
270                return d
271            return repr(type(d))
272
273        pc, end = frame[-1]
274        asm, tokens = disassemble(bytecode)
275        print(
276            "=== frame = {1}, data = {2}, opcode = {0}".format(
277                opcode[b], frame, [fmt(d) for d in data]
278            )
279        )
280        print(asm)
281        print(" " * (tokens[pc]) + "^")
282
283    def next_byte():
284        """Fetch the next byte and update the PC"""
285        pc, end = frame[-1]
286        assert pc < len(bytecode)
287        b = bytecode[pc]
288        frame[-1] = pc + 1, end
289        # At the end of a block?
290        while pc >= end:
291            frame.pop()
292            if not frame:
293                return None
294            pc, end = frame[-1]
295            if pc >= end:
296                return None
297            b = bytecode[pc]
298            frame[-1] = pc + 1, end
299        return b
300
301    while frame[-1][0] < len(bytecode):
302        b = next_byte()
303        if b == None:
304            break
305        if tracing:
306            trace()
307        # Data stack manipulation.
308        if b == op_dup:
309            data.append(data[-1])
310        elif b == op_drop:
311            data.pop()
312        elif b == op_pick:
313            data.append(data[data.pop()])
314        elif b == op_over:
315            data.append(data[-2])
316        elif b == op_swap:
317            x = data.pop()
318            y = data.pop()
319            data.append(x)
320            data.append(y)
321        elif b == op_rot:
322            z = data.pop()
323            y = data.pop()
324            x = data.pop()
325            data.append(z)
326            data.append(x)
327            data.append(y)
328
329        # Control stack manipulation.
330        elif b == op_begin:
331            length = next_byte()
332            pc, end = frame[-1]
333            control.append((pc, pc + length))
334            frame[-1] = pc + length, end
335        elif b == op_if:
336            if data.pop():
337                frame.append(control.pop())
338        elif b == op_ifelse:
339            if data.pop():
340                control.pop()
341                frame.append(control.pop())
342            else:
343                frame.append(control.pop())
344                control.pop()
345
346        # Literals.
347        elif b == op_lit_uint:
348            b = next_byte()  # FIXME uleb
349            data.append(int(b))
350        elif b == op_lit_int:
351            b = next_byte()  # FIXME uleb
352            data.append(int(b))
353        elif b == op_lit_selector:
354            b = next_byte()
355            data.append(b)
356        elif b == op_lit_string:
357            length = next_byte()
358            s = ""
359            while length:
360                s += chr(next_byte())
361                length -= 1
362            data.append(s)
363
364        elif b == op_as_uint:
365            pass
366        elif b == op_as_int:
367            pass
368        elif b == op_is_null:
369            data.append(1 if data.pop() == None else 0)
370
371        # Arithmetic, logic, etc.
372        elif b == op_plus:
373            data.append(data.pop() + data.pop())
374        elif b == op_minus:
375            data.append(-data.pop() + data.pop())
376        elif b == op_mul:
377            data.append(data.pop() * data.pop())
378        elif b == op_div:
379            y = data.pop()
380            data.append(data.pop() / y)
381        elif b == op_mod:
382            y = data.pop()
383            data.append(data.pop() % y)
384        elif b == op_shl:
385            y = data.pop()
386            data.append(data.pop() << y)
387        elif b == op_shr:
388            y = data.pop()
389            data.append(data.pop() >> y)
390        elif b == op_and:
391            data.append(data.pop() & data.pop())
392        elif b == op_or:
393            data.append(data.pop() | data.pop())
394        elif b == op_xor:
395            data.append(data.pop() ^ data.pop())
396        elif b == op_not:
397            data.append(not data.pop())
398        elif b == op_eq:
399            data.append(data.pop() == data.pop())
400        elif b == op_neq:
401            data.append(data.pop() != data.pop())
402        elif b == op_lt:
403            data.append(data.pop() > data.pop())
404        elif b == op_gt:
405            data.append(data.pop() < data.pop())
406        elif b == op_le:
407            data.append(data.pop() >= data.pop())
408        elif b == op_ge:
409            data.append(data.pop() <= data.pop())
410
411        # Function calls.
412        elif b == op_call:
413            sel = data.pop()
414            if sel == sel_summary:
415                data.append(data.pop().GetSummary())
416            elif sel == sel_get_num_children:
417                data.append(data.pop().GetNumChildren())
418            elif sel == sel_get_child_at_index:
419                index = data.pop()
420                valobj = data.pop()
421                data.append(valobj.GetChildAtIndex(index))
422            elif sel == sel_get_child_with_name:
423                name = data.pop()
424                valobj = data.pop()
425                data.append(valobj.GetChildMemberWithName(name))
426            elif sel == sel_get_child_index:
427                name = data.pop()
428                valobj = data.pop()
429                data.append(valobj.GetIndexOfChildWithName(name))
430            elif sel == sel_get_type:
431                data.append(data.pop().GetType())
432            elif sel == sel_get_template_argument_type:
433                n = data.pop()
434                valobj = data.pop()
435                data.append(valobj.GetTemplateArgumentType(n))
436            elif sel == sel_get_value:
437                data.append(data.pop().GetValue())
438            elif sel == sel_get_value_as_unsigned:
439                data.append(data.pop().GetValueAsUnsigned())
440            elif sel == sel_get_value_as_signed:
441                data.append(data.pop().GetValueAsSigned())
442            elif sel == sel_get_value_as_address:
443                data.append(data.pop().GetValueAsAddress())
444            elif sel == sel_cast:
445                sbtype = data.pop()
446                valobj = data.pop()
447                data.append(valobj.Cast(sbtype))
448            elif sel == sel_strlen:
449                s = data.pop()
450                data.append(len(s) if s else 0)
451            elif sel == sel_fmt:
452                fmt = data.pop()
453                n = count_fmt_params(fmt)
454                args = []
455                for i in range(n):
456                    args.append(data.pop())
457                data.append(fmt.format(*args))
458            else:
459                print("not implemented: " + selector[sel])
460                assert False
461                pass
462    return data[-1]
463
464
465if __name__ == "__main__":
466    import argparse
467
468    parser = argparse.ArgumentParser(
469        description="""
470    Compiler, disassembler, and interpreter for LLDB dataformatter bytecode.
471    See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
472    """
473    )
474    parser.add_argument(
475        "-c", "--compile", type=str, help="compile assembler into bytecode"
476    )
477    parser.add_argument("-d", "--disassemble", type=str, help="disassemble bytecode")
478    parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
479    args = parser.parse_args()
480    if args.compile:
481        print(compile(str(args.compile)).hex())
482
483    if args.disassemble:
484        print(disassemble(bytearray.fromhex(str(args.disassemble))))
485
486    ############################################################################
487    # Tests.
488    ############################################################################
489    if args.test:
490        # Work around the fact that one of the local files is calles
491        # types.py, which breaks some versions of python.
492        import os, sys
493
494        path = os.path.abspath(os.path.dirname(__file__))
495        sys.path.remove(path)
496        import unittest
497
498        class TestCompiler(unittest.TestCase):
499            def test(self):
500                self.assertEqual(compile("1u dup").hex(), "200101")
501                self.assertEqual(compile('"1u dup"').hex(), "2206317520647570")
502                self.assertEqual(compile("16 < { dup } if").hex(), "21105210010111")
503                self.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20")
504
505                def roundtrip(asm):
506                    self.assertEqual(disassemble(compile(asm))[0], asm)
507
508                roundtrip("1u dup")
509                roundtrip('1u dup "1u dup"')
510                roundtrip("16 < { dup } if")
511                roundtrip('{ { " } " } }')
512
513                self.assertEqual(interpret(compile("1 1 +"), [], []), 2)
514                self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4)
515                self.assertEqual(
516                    interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes"
517                )
518
519                import sys
520
521                sys.argv.pop()
522                path = os.path.dirname(__file__)
523                sys.path.remove
524                unittest.main()
525