xref: /llvm-project/llvm/utils/extract-section.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1#!/usr/bin/env python
2from __future__ import print_function
3
4"""
5Helper script to print out the raw content of an ELF section.
6Example usages:
7```
8# print out as bits by default
9extract-section.py .text --input-file=foo.o
10```
11```
12# read from stdin and print out in hex
13cat foo.o | extract-section.py -h .text
14```
15This is merely a wrapper around `llvm-readobj` that focuses on the binary
16content as well as providing more formatting options.
17"""
18
19# Unfortunately reading binary from stdin is not so trivial in Python...
20def read_raw_stdin():
21    import sys
22
23    if sys.version_info >= (3, 0):
24        reading_source = sys.stdin.buffer
25    else:
26        # Windows will always read as string so we need some
27        # special handling
28        if sys.platform == "win32":
29            import os, msvcrt
30
31            msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
32        reading_source = sys.stdin
33    return reading_source.read()
34
35
36def get_raw_section_dump(readobj_path, section_name, input_file):
37    import subprocess
38
39    cmd = [
40        readobj_path,
41        "--elf-output-style=GNU",
42        "--hex-dump={}".format(section_name),
43        input_file,
44    ]
45    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
46
47    if input_file == "-":
48        # From stdin
49        out, _ = proc.communicate(input=read_raw_stdin())
50    else:
51        out, _ = proc.communicate()
52
53    return out.decode("utf-8") if type(out) is not str else out
54
55
56if __name__ == "__main__":
57    import argparse
58
59    # The default '-h' (--help) will conflict with our '-h' (hex) format
60    arg_parser = argparse.ArgumentParser(add_help=False)
61    arg_parser.add_argument(
62        "--readobj-path",
63        metavar="<executable path>",
64        type=str,
65        help="Path to llvm-readobj",
66    )
67    arg_parser.add_argument(
68        "--input-file",
69        metavar="<file>",
70        type=str,
71        help="Input object file, or '-' to read from stdin",
72    )
73    arg_parser.add_argument(
74        "section", metavar="<name>", type=str, help="Name of the section to extract"
75    )
76    # Output format
77    format_group = arg_parser.add_mutually_exclusive_group()
78    format_group.add_argument(
79        "-b",
80        dest="format",
81        action="store_const",
82        const="bits",
83        help="Print out in bits",
84    )
85    arg_parser.add_argument(
86        "--byte-indicator",
87        action="store_true",
88        help="Whether to print a '.' every 8 bits in bits printing mode",
89    )
90    arg_parser.add_argument(
91        "--bits-endian",
92        metavar="<little/big>",
93        type=str,
94        choices=["little", "big"],
95        help="Print out bits in specified endianness (little or big); defaults to big",
96    )
97    format_group.add_argument(
98        "-h",
99        dest="format",
100        action="store_const",
101        const="hex",
102        help="Print out in hexadecimal",
103    )
104    arg_parser.add_argument(
105        "--hex-width",
106        metavar="<# of bytes>",
107        type=int,
108        help="The width (in byte) of every element in hex printing mode",
109    )
110
111    arg_parser.add_argument("--help", action="help")
112    arg_parser.set_defaults(
113        format="bits",
114        tool_path="llvm-readobj",
115        input_file="-",
116        byte_indicator=False,
117        hex_width=4,
118        bits_endian="big",
119    )
120    args = arg_parser.parse_args()
121
122    raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
123
124    results = []
125    for line in raw_section.splitlines(False):
126        if line.startswith("Hex dump"):
127            continue
128        parts = line.strip().split(" ")[1:]
129        for part in parts[:4]:
130            # exclude any non-hex dump string
131            try:
132                val = int(part, 16)
133                if args.format == "bits":
134                    # divided into bytes first
135                    offsets = (24, 16, 8, 0)
136                    if args.bits_endian == "little":
137                        offsets = (0, 8, 16, 24)
138                    for byte in [(val >> off) & 0xFF for off in offsets]:
139                        for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
140                            results.append(str(bit))
141                        if args.byte_indicator:
142                            results.append(".")
143                elif args.format == "hex":
144                    assert args.hex_width <= 4 and args.hex_width > 0
145                    width_bits = args.hex_width * 8
146                    offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
147                    mask = (1 << width_bits) - 1
148                    format_str = "{:0" + str(args.hex_width * 2) + "x}"
149                    for word in [(val >> i) & mask for i in offsets]:
150                        results.append(format_str.format(word))
151            except:
152                break
153    print(" ".join(results), end="")
154