xref: /llvm-project/lldb/examples/python/formatter_bytecode.py (revision ee1adc5aab4fb517314358ce03cfda426da9c4ce)
1"""
2Specification, compiler, disassembler, and interpreter
3for LLDB dataformatter bytecode.
4
5See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
6"""
7
8from __future__ import annotations
9
10# Types
11type_String = 1
12type_Int = 2
13type_UInt = 3
14type_Object = 4
15type_Type = 5
16
17# Opcodes
18opcode = dict()
19
20
21def define_opcode(n, mnemonic, name):
22    globals()["op_" + name] = n
23    if mnemonic:
24        opcode[mnemonic] = n
25    opcode[n] = mnemonic
26
27
28define_opcode(1, "dup", "dup")
29define_opcode(2, "drop", "drop")
30define_opcode(3, "pick", "pick")
31define_opcode(4, "over", "over")
32define_opcode(5, "swap", "swap")
33define_opcode(6, "rot", "rot")
34
35define_opcode(0x10, "{", "begin")
36define_opcode(0x11, "if", "if")
37define_opcode(0x12, "ifelse", "ifelse")
38define_opcode(0x13, "return", "return")
39
40define_opcode(0x20, None, "lit_uint")
41define_opcode(0x21, None, "lit_int")
42define_opcode(0x22, None, "lit_string")
43define_opcode(0x23, None, "lit_selector")
44
45define_opcode(0x2A, "as_int", "as_int")
46define_opcode(0x2B, "as_uint", "as_uint")
47define_opcode(0x2C, "is_null", "is_null")
48
49define_opcode(0x30, "+", "plus")
50define_opcode(0x31, "-", "minus")
51define_opcode(0x32, "*", "mul")
52define_opcode(0x33, "/", "div")
53define_opcode(0x34, "%", "mod")
54define_opcode(0x35, "<<", "shl")
55define_opcode(0x36, ">>", "shr")
56
57define_opcode(0x40, "&", "and")
58define_opcode(0x41, "|", "or")
59define_opcode(0x42, "^", "xor")
60define_opcode(0x43, "~", "not")
61
62define_opcode(0x50, "=", "eq")
63define_opcode(0x51, "!=", "neq")
64define_opcode(0x52, "<", "lt")
65define_opcode(0x53, ">", "gt")
66define_opcode(0x54, "=<", "le")
67define_opcode(0x55, ">=", "ge")
68
69define_opcode(0x60, "call", "call")
70
71# Function signatures
72sig_summary = 0
73sig_init = 1
74sig_get_num_children = 2
75sig_get_child_index = 3
76sig_get_child_at_index = 4
77
78# Selectors
79selector = dict()
80
81
82def define_selector(n, name):
83    globals()["sel_" + name] = n
84    selector["@" + name] = n
85    selector[n] = "@" + name
86
87
88define_selector(0, "summary")
89define_selector(1, "type_summary")
90
91define_selector(0x10, "get_num_children")
92define_selector(0x11, "get_child_at_index")
93define_selector(0x12, "get_child_with_name")
94define_selector(0x13, "get_child_index")
95define_selector(0x15, "get_type")
96define_selector(0x16, "get_template_argument_type")
97define_selector(0x17, "cast")
98define_selector(0x20, "get_value")
99define_selector(0x21, "get_value_as_unsigned")
100define_selector(0x22, "get_value_as_signed")
101define_selector(0x23, "get_value_as_address")
102
103define_selector(0x40, "read_memory_byte")
104define_selector(0x41, "read_memory_uint32")
105define_selector(0x42, "read_memory_int32")
106define_selector(0x43, "read_memory_unsigned")
107define_selector(0x44, "read_memory_signed")
108define_selector(0x45, "read_memory_address")
109define_selector(0x46, "read_memory")
110
111define_selector(0x50, "fmt")
112define_selector(0x51, "sprintf")
113define_selector(0x52, "strlen")
114
115
116################################################################################
117# Compiler.
118################################################################################
119
120
121def compile(assembler: str) -> bytearray:
122    """Compile assembler into bytecode"""
123    # This is a stack of all in-flight/unterminated blocks.
124    bytecode = [bytearray()]
125
126    def emit(byte):
127        bytecode[-1].append(byte)
128
129    tokens = list(assembler.split(" "))
130    tokens.reverse()
131    while tokens:
132        tok = tokens.pop()
133        if tok == "":
134            pass
135        elif tok == "{":
136            bytecode.append(bytearray())
137        elif tok == "}":
138            block = bytecode.pop()
139            emit(op_begin)
140            emit(len(block))  # FIXME: uleb
141            bytecode[-1].extend(block)
142        elif tok[0].isdigit():
143            if tok[-1] == "u":
144                emit(op_lit_uint)
145                emit(int(tok[:-1]))  # FIXME
146            else:
147                emit(op_lit_int)
148                emit(int(tok))  # FIXME
149        elif tok[0] == "@":
150            emit(op_lit_selector)
151            emit(selector[tok])
152        elif tok[0] == '"':
153            s = bytearray()
154            done = False
155            chrs = tok[1:]
156            while not done:
157                quoted = False
158                for c in chrs:
159                    if quoted:
160                        s.append(ord(c))  # FIXME
161                        quoted = False
162                    elif c == "\\":
163                        quoted = True
164                    elif c == '"':
165                        done = True
166                        break
167                        # FIXME assert this is last in token
168                    else:
169                        s.append(ord(c))
170                if not done:
171                    s.append(ord(" "))
172                    chrs = tokens.pop()
173
174            emit(op_lit_string)
175            emit(len(s))
176            bytecode[-1].extend(s)
177        else:
178            emit(opcode[tok])
179    assert len(bytecode) == 1  # unterminated {
180    return bytecode[0]
181
182
183################################################################################
184# Disassembler.
185################################################################################
186
187
188def disassemble(bytecode: bytearray) -> (str, int):
189    """Disassemble bytecode into (assembler, token starts)"""
190    asm = ""
191    all_bytes = list(bytecode)
192    all_bytes.reverse()
193    blocks = []
194    tokens = [0]
195
196    def next_byte():
197        """Fetch the next byte in the bytecode and keep track of all
198        in-flight blocks"""
199        for i in range(len(blocks)):
200            blocks[i] -= 1
201        tokens.append(len(asm))
202        return all_bytes.pop()
203
204    while all_bytes:
205        b = next_byte()
206        if b == op_begin:
207            asm += "{"
208            length = next_byte()
209            blocks.append(length)
210        elif b == op_lit_uint:
211            b = next_byte()
212            asm += str(b)  # FIXME uleb
213            asm += "u"
214        elif b == op_lit_int:
215            b = next_byte()
216            asm += str(b)
217        elif b == op_lit_selector:
218            b = next_byte()
219            asm += selector[b]
220        elif b == op_lit_string:
221            length = next_byte()
222            s = "'"
223            while length:
224                s += chr(next_byte())
225                length -= 1
226            asm += '"' + repr(s)[2:]
227        else:
228            asm += opcode[b]
229
230        while blocks and blocks[-1] == 0:
231            asm += " }"
232            blocks.pop()
233
234        if all_bytes:
235            asm += " "
236
237    if blocks:
238        asm += "ERROR"
239    return asm, tokens
240
241
242################################################################################
243# Interpreter.
244################################################################################
245
246
247def count_fmt_params(fmt: str) -> int:
248    """Count the number of parameters in a format string"""
249    from string import Formatter
250
251    f = Formatter()
252    n = 0
253    for _, name, _, _ in f.parse(fmt):
254        if name > n:
255            n = name
256    return n
257
258
259def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False):
260    """Interpret bytecode"""
261    frame = []
262    frame.append((0, len(bytecode)))
263
264    def trace():
265        """print a trace of the execution for debugging purposes"""
266
267        def fmt(d):
268            if isinstance(d, int):
269                return str(d)
270            if isinstance(d, str):
271                return d
272            return repr(type(d))
273
274        pc, end = frame[-1]
275        asm, tokens = disassemble(bytecode)
276        print(
277            "=== frame = {1}, data = {2}, opcode = {0}".format(
278                opcode[b], frame, [fmt(d) for d in data]
279            )
280        )
281        print(asm)
282        print(" " * (tokens[pc]) + "^")
283
284    def next_byte():
285        """Fetch the next byte and update the PC"""
286        pc, end = frame[-1]
287        assert pc < len(bytecode)
288        b = bytecode[pc]
289        frame[-1] = pc + 1, end
290        # At the end of a block?
291        while pc >= end:
292            frame.pop()
293            if not frame:
294                return None
295            pc, end = frame[-1]
296            if pc >= end:
297                return None
298            b = bytecode[pc]
299            frame[-1] = pc + 1, end
300        return b
301
302    while frame[-1][0] < len(bytecode):
303        b = next_byte()
304        if b == None:
305            break
306        if tracing:
307            trace()
308        # Data stack manipulation.
309        if b == op_dup:
310            data.append(data[-1])
311        elif b == op_drop:
312            data.pop()
313        elif b == op_pick:
314            data.append(data[data.pop()])
315        elif b == op_over:
316            data.append(data[-2])
317        elif b == op_swap:
318            x = data.pop()
319            y = data.pop()
320            data.append(x)
321            data.append(y)
322        elif b == op_rot:
323            z = data.pop()
324            y = data.pop()
325            x = data.pop()
326            data.append(z)
327            data.append(x)
328            data.append(y)
329
330        # Control stack manipulation.
331        elif b == op_begin:
332            length = next_byte()
333            pc, end = frame[-1]
334            control.append((pc, pc + length))
335            frame[-1] = pc + length, end
336        elif b == op_if:
337            if data.pop():
338                frame.append(control.pop())
339        elif b == op_ifelse:
340            if data.pop():
341                control.pop()
342                frame.append(control.pop())
343            else:
344                frame.append(control.pop())
345                control.pop()
346        elif b == op_return:
347            control.clear()
348            return data[-1]
349
350        # Literals.
351        elif b == op_lit_uint:
352            b = next_byte()  # FIXME uleb
353            data.append(int(b))
354        elif b == op_lit_int:
355            b = next_byte()  # FIXME uleb
356            data.append(int(b))
357        elif b == op_lit_selector:
358            b = next_byte()
359            data.append(b)
360        elif b == op_lit_string:
361            length = next_byte()
362            s = ""
363            while length:
364                s += chr(next_byte())
365                length -= 1
366            data.append(s)
367
368        elif b == op_as_uint:
369            pass
370        elif b == op_as_int:
371            pass
372        elif b == op_is_null:
373            data.append(1 if data.pop() == None else 0)
374
375        # Arithmetic, logic, etc.
376        elif b == op_plus:
377            data.append(data.pop() + data.pop())
378        elif b == op_minus:
379            data.append(-data.pop() + data.pop())
380        elif b == op_mul:
381            data.append(data.pop() * data.pop())
382        elif b == op_div:
383            y = data.pop()
384            data.append(data.pop() / y)
385        elif b == op_mod:
386            y = data.pop()
387            data.append(data.pop() % y)
388        elif b == op_shl:
389            y = data.pop()
390            data.append(data.pop() << y)
391        elif b == op_shr:
392            y = data.pop()
393            data.append(data.pop() >> y)
394        elif b == op_and:
395            data.append(data.pop() & data.pop())
396        elif b == op_or:
397            data.append(data.pop() | data.pop())
398        elif b == op_xor:
399            data.append(data.pop() ^ data.pop())
400        elif b == op_not:
401            data.append(not data.pop())
402        elif b == op_eq:
403            data.append(data.pop() == data.pop())
404        elif b == op_neq:
405            data.append(data.pop() != data.pop())
406        elif b == op_lt:
407            data.append(data.pop() > data.pop())
408        elif b == op_gt:
409            data.append(data.pop() < data.pop())
410        elif b == op_le:
411            data.append(data.pop() >= data.pop())
412        elif b == op_ge:
413            data.append(data.pop() <= data.pop())
414
415        # Function calls.
416        elif b == op_call:
417            sel = data.pop()
418            if sel == sel_summary:
419                data.append(data.pop().GetSummary())
420            elif sel == sel_get_num_children:
421                data.append(data.pop().GetNumChildren())
422            elif sel == sel_get_child_at_index:
423                index = data.pop()
424                valobj = data.pop()
425                data.append(valobj.GetChildAtIndex(index))
426            elif sel == sel_get_child_with_name:
427                name = data.pop()
428                valobj = data.pop()
429                data.append(valobj.GetChildMemberWithName(name))
430            elif sel == sel_get_child_index:
431                name = data.pop()
432                valobj = data.pop()
433                data.append(valobj.GetIndexOfChildWithName(name))
434            elif sel == sel_get_type:
435                data.append(data.pop().GetType())
436            elif sel == sel_get_template_argument_type:
437                n = data.pop()
438                valobj = data.pop()
439                data.append(valobj.GetTemplateArgumentType(n))
440            elif sel == sel_get_value:
441                data.append(data.pop().GetValue())
442            elif sel == sel_get_value_as_unsigned:
443                data.append(data.pop().GetValueAsUnsigned())
444            elif sel == sel_get_value_as_signed:
445                data.append(data.pop().GetValueAsSigned())
446            elif sel == sel_get_value_as_address:
447                data.append(data.pop().GetValueAsAddress())
448            elif sel == sel_cast:
449                sbtype = data.pop()
450                valobj = data.pop()
451                data.append(valobj.Cast(sbtype))
452            elif sel == sel_strlen:
453                s = data.pop()
454                data.append(len(s) if s else 0)
455            elif sel == sel_fmt:
456                fmt = data.pop()
457                n = count_fmt_params(fmt)
458                args = []
459                for i in range(n):
460                    args.append(data.pop())
461                data.append(fmt.format(*args))
462            else:
463                print("not implemented: " + selector[sel])
464                assert False
465                pass
466    return data[-1]
467
468
469if __name__ == "__main__":
470    # Work around the fact that one of the local files is called
471    # types.py, which breaks some versions of python.
472    import os, sys
473
474    path = os.path.abspath(os.path.dirname(__file__))
475    sys.path.remove(path)
476    import argparse
477
478    parser = argparse.ArgumentParser(
479        description="""
480    Compiler, disassembler, and interpreter for LLDB dataformatter bytecode.
481    See https://lldb.llvm.org/resources/formatterbytecode.html for more details.
482    """
483    )
484    parser.add_argument(
485        "-c", "--compile", type=str, help="compile assembler into bytecode"
486    )
487    parser.add_argument("-d", "--disassemble", type=str, help="disassemble bytecode")
488    parser.add_argument("-t", "--test", action="store_true", help="run unit tests")
489    args = parser.parse_args()
490    if args.compile:
491        print(compile(str(args.compile)).hex())
492
493    if args.disassemble:
494        print(disassemble(bytearray.fromhex(str(args.disassemble))))
495
496    ############################################################################
497    # Tests.
498    ############################################################################
499    if args.test:
500        import unittest
501
502        class TestCompiler(unittest.TestCase):
503            def test(self):
504                self.assertEqual(compile("1u dup").hex(), "200101")
505                self.assertEqual(compile('"1u dup"').hex(), "2206317520647570")
506                self.assertEqual(compile("16 < { dup } if").hex(), "21105210010111")
507                self.assertEqual(compile('{ { " } " } }').hex(), "100710052203207d20")
508
509                def roundtrip(asm):
510                    self.assertEqual(disassemble(compile(asm))[0], asm)
511
512                roundtrip("1u dup")
513                roundtrip('1u dup "1u dup"')
514                roundtrip("16 < { dup } if")
515                roundtrip('{ { " } " } }')
516
517                self.assertEqual(interpret(compile("1 1 +"), [], []), 2)
518                self.assertEqual(interpret(compile("2 1 1 + *"), [], []), 4)
519                self.assertEqual(
520                    interpret(compile('2 1 > { "yes" } { "no" } ifelse'), [], []), "yes"
521                )
522
523                import sys
524
525                sys.argv.pop()
526                path = os.path.dirname(__file__)
527                sys.path.remove
528                unittest.main()
529