xref: /llvm-project/llvm/utils/extract-section.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1c23a780cSMin-Yih Hsu#!/usr/bin/env python
2c23a780cSMin-Yih Hsufrom __future__ import print_function
3*b71edfaaSTobias Hieta
4*b71edfaaSTobias Hieta"""
5c23a780cSMin-Yih HsuHelper script to print out the raw content of an ELF section.
6c23a780cSMin-Yih HsuExample usages:
7c23a780cSMin-Yih Hsu```
8c23a780cSMin-Yih Hsu# print out as bits by default
9c23a780cSMin-Yih Hsuextract-section.py .text --input-file=foo.o
10c23a780cSMin-Yih Hsu```
11c23a780cSMin-Yih Hsu```
12c23a780cSMin-Yih Hsu# read from stdin and print out in hex
13c23a780cSMin-Yih Hsucat foo.o | extract-section.py -h .text
14c23a780cSMin-Yih Hsu```
15c23a780cSMin-Yih HsuThis is merely a wrapper around `llvm-readobj` that focuses on the binary
16c23a780cSMin-Yih Hsucontent as well as providing more formatting options.
17*b71edfaaSTobias Hieta"""
18c23a780cSMin-Yih Hsu
19c23a780cSMin-Yih Hsu# Unfortunately reading binary from stdin is not so trivial in Python...
20c23a780cSMin-Yih Hsudef read_raw_stdin():
21c23a780cSMin-Yih Hsu    import sys
22*b71edfaaSTobias Hieta
23c23a780cSMin-Yih Hsu    if sys.version_info >= (3, 0):
24c23a780cSMin-Yih Hsu        reading_source = sys.stdin.buffer
25c23a780cSMin-Yih Hsu    else:
26c23a780cSMin-Yih Hsu        # Windows will always read as string so we need some
27c23a780cSMin-Yih Hsu        # special handling
28*b71edfaaSTobias Hieta        if sys.platform == "win32":
29c23a780cSMin-Yih Hsu            import os, msvcrt
30*b71edfaaSTobias Hieta
31c23a780cSMin-Yih Hsu            msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
32c23a780cSMin-Yih Hsu        reading_source = sys.stdin
33c23a780cSMin-Yih Hsu    return reading_source.read()
34c23a780cSMin-Yih Hsu
35*b71edfaaSTobias Hieta
36c23a780cSMin-Yih Hsudef get_raw_section_dump(readobj_path, section_name, input_file):
37c23a780cSMin-Yih Hsu    import subprocess
38*b71edfaaSTobias Hieta
39*b71edfaaSTobias Hieta    cmd = [
40*b71edfaaSTobias Hieta        readobj_path,
41*b71edfaaSTobias Hieta        "--elf-output-style=GNU",
42*b71edfaaSTobias Hieta        "--hex-dump={}".format(section_name),
43*b71edfaaSTobias Hieta        input_file,
44*b71edfaaSTobias Hieta    ]
456475ddb1SRicky Taylor    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
46c23a780cSMin-Yih Hsu
47*b71edfaaSTobias Hieta    if input_file == "-":
48c23a780cSMin-Yih Hsu        # From stdin
49c23a780cSMin-Yih Hsu        out, _ = proc.communicate(input=read_raw_stdin())
50c23a780cSMin-Yih Hsu    else:
51c23a780cSMin-Yih Hsu        out, _ = proc.communicate()
52c23a780cSMin-Yih Hsu
53*b71edfaaSTobias Hieta    return out.decode("utf-8") if type(out) is not str else out
54c23a780cSMin-Yih Hsu
55*b71edfaaSTobias Hieta
56*b71edfaaSTobias Hietaif __name__ == "__main__":
57c23a780cSMin-Yih Hsu    import argparse
58*b71edfaaSTobias Hieta
59c23a780cSMin-Yih Hsu    # The default '-h' (--help) will conflict with our '-h' (hex) format
60c23a780cSMin-Yih Hsu    arg_parser = argparse.ArgumentParser(add_help=False)
61*b71edfaaSTobias Hieta    arg_parser.add_argument(
62*b71edfaaSTobias Hieta        "--readobj-path",
63*b71edfaaSTobias Hieta        metavar="<executable path>",
64*b71edfaaSTobias Hieta        type=str,
65*b71edfaaSTobias Hieta        help="Path to llvm-readobj",
66*b71edfaaSTobias Hieta    )
67*b71edfaaSTobias Hieta    arg_parser.add_argument(
68*b71edfaaSTobias Hieta        "--input-file",
69*b71edfaaSTobias Hieta        metavar="<file>",
70*b71edfaaSTobias Hieta        type=str,
71*b71edfaaSTobias Hieta        help="Input object file, or '-' to read from stdin",
72*b71edfaaSTobias Hieta    )
73*b71edfaaSTobias Hieta    arg_parser.add_argument(
74*b71edfaaSTobias Hieta        "section", metavar="<name>", type=str, help="Name of the section to extract"
75*b71edfaaSTobias Hieta    )
76c23a780cSMin-Yih Hsu    # Output format
77c23a780cSMin-Yih Hsu    format_group = arg_parser.add_mutually_exclusive_group()
78*b71edfaaSTobias Hieta    format_group.add_argument(
79*b71edfaaSTobias Hieta        "-b",
80*b71edfaaSTobias Hieta        dest="format",
81*b71edfaaSTobias Hieta        action="store_const",
82*b71edfaaSTobias Hieta        const="bits",
83*b71edfaaSTobias Hieta        help="Print out in bits",
84*b71edfaaSTobias Hieta    )
85*b71edfaaSTobias Hieta    arg_parser.add_argument(
86*b71edfaaSTobias Hieta        "--byte-indicator",
87*b71edfaaSTobias Hieta        action="store_true",
88*b71edfaaSTobias Hieta        help="Whether to print a '.' every 8 bits in bits printing mode",
89*b71edfaaSTobias Hieta    )
90*b71edfaaSTobias Hieta    arg_parser.add_argument(
91*b71edfaaSTobias Hieta        "--bits-endian",
92*b71edfaaSTobias Hieta        metavar="<little/big>",
93*b71edfaaSTobias Hieta        type=str,
94*b71edfaaSTobias Hieta        choices=["little", "big"],
95*b71edfaaSTobias Hieta        help="Print out bits in specified endianness (little or big); defaults to big",
96*b71edfaaSTobias Hieta    )
97*b71edfaaSTobias Hieta    format_group.add_argument(
98*b71edfaaSTobias Hieta        "-h",
99*b71edfaaSTobias Hieta        dest="format",
100*b71edfaaSTobias Hieta        action="store_const",
101*b71edfaaSTobias Hieta        const="hex",
102*b71edfaaSTobias Hieta        help="Print out in hexadecimal",
103*b71edfaaSTobias Hieta    )
104*b71edfaaSTobias Hieta    arg_parser.add_argument(
105*b71edfaaSTobias Hieta        "--hex-width",
106*b71edfaaSTobias Hieta        metavar="<# of bytes>",
107*b71edfaaSTobias Hieta        type=int,
108*b71edfaaSTobias Hieta        help="The width (in byte) of every element in hex printing mode",
109*b71edfaaSTobias Hieta    )
110c23a780cSMin-Yih Hsu
111*b71edfaaSTobias Hieta    arg_parser.add_argument("--help", action="help")
112*b71edfaaSTobias Hieta    arg_parser.set_defaults(
113*b71edfaaSTobias Hieta        format="bits",
114*b71edfaaSTobias Hieta        tool_path="llvm-readobj",
115*b71edfaaSTobias Hieta        input_file="-",
116*b71edfaaSTobias Hieta        byte_indicator=False,
117*b71edfaaSTobias Hieta        hex_width=4,
118*b71edfaaSTobias Hieta        bits_endian="big",
119*b71edfaaSTobias Hieta    )
120c23a780cSMin-Yih Hsu    args = arg_parser.parse_args()
121c23a780cSMin-Yih Hsu
122c23a780cSMin-Yih Hsu    raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
123c23a780cSMin-Yih Hsu
124c23a780cSMin-Yih Hsu    results = []
125c23a780cSMin-Yih Hsu    for line in raw_section.splitlines(False):
126*b71edfaaSTobias Hieta        if line.startswith("Hex dump"):
127c23a780cSMin-Yih Hsu            continue
128*b71edfaaSTobias Hieta        parts = line.strip().split(" ")[1:]
129c23a780cSMin-Yih Hsu        for part in parts[:4]:
130c23a780cSMin-Yih Hsu            # exclude any non-hex dump string
131c23a780cSMin-Yih Hsu            try:
132c23a780cSMin-Yih Hsu                val = int(part, 16)
133*b71edfaaSTobias Hieta                if args.format == "bits":
134c23a780cSMin-Yih Hsu                    # divided into bytes first
1356caee489SLu Weining                    offsets = (24, 16, 8, 0)
136*b71edfaaSTobias Hieta                    if args.bits_endian == "little":
1376caee489SLu Weining                        offsets = (0, 8, 16, 24)
1386caee489SLu Weining                    for byte in [(val >> off) & 0xFF for off in offsets]:
139c23a780cSMin-Yih Hsu                        for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
140c23a780cSMin-Yih Hsu                            results.append(str(bit))
141c23a780cSMin-Yih Hsu                        if args.byte_indicator:
142*b71edfaaSTobias Hieta                            results.append(".")
143*b71edfaaSTobias Hieta                elif args.format == "hex":
144c23a780cSMin-Yih Hsu                    assert args.hex_width <= 4 and args.hex_width > 0
145c23a780cSMin-Yih Hsu                    width_bits = args.hex_width * 8
146c23a780cSMin-Yih Hsu                    offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
147c23a780cSMin-Yih Hsu                    mask = (1 << width_bits) - 1
148c23a780cSMin-Yih Hsu                    format_str = "{:0" + str(args.hex_width * 2) + "x}"
149c23a780cSMin-Yih Hsu                    for word in [(val >> i) & mask for i in offsets]:
150c23a780cSMin-Yih Hsu                        results.append(format_str.format(word))
151c23a780cSMin-Yih Hsu            except:
152c23a780cSMin-Yih Hsu                break
153*b71edfaaSTobias Hieta    print(" ".join(results), end="")
154