xref: /llvm-project/libcxx/utils/libcxx/sym_check/util.py (revision 7bfaa0f09d0564f315ea778023b34b8a113ec740)
1# ===----------------------------------------------------------------------===##
2#
3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4# See https://llvm.org/LICENSE.txt for license information.
5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6#
7# ===----------------------------------------------------------------------===##
8
9from pprint import pformat
10import ast
11import re
12import shutil
13import subprocess
14import sys
15
16
17def read_syms_from_list(slist):
18    """
19    Read a list of symbols from a list of strings.
20    Each string is one symbol.
21    """
22    return [ast.literal_eval(l) for l in slist]
23
24
25def read_syms_from_file(filename):
26    """
27    Read a list of symbols in from a file.
28    """
29    with open(filename, "r") as f:
30        data = f.read()
31    return read_syms_from_list(data.splitlines())
32
33
34def read_exclusions(filename):
35    with open(filename, "r") as f:
36        data = f.read()
37    lines = [l.strip() for l in data.splitlines() if l.strip()]
38    lines = [l for l in lines if not l.startswith("#")]
39    return lines
40
41
42def write_syms(sym_list, out=None, names_only=False, filter=None):
43    """
44    Write a list of symbols to the file named by out.
45    """
46    out_str = ""
47    out_list = sym_list
48    out_list.sort(key=lambda x: x["name"])
49    if filter is not None:
50        out_list = filter(out_list)
51    if names_only:
52        out_list = [sym["name"] for sym in out_list]
53    for sym in out_list:
54        # Use pformat for consistent ordering of keys.
55        out_str += pformat(sym, width=100000) + "\n"
56    if out is None:
57        sys.stdout.write(out_str)
58    else:
59        with open(out, "w") as f:
60            f.write(out_str)
61
62
63_cppfilt_exe = shutil.which("c++filt")
64
65
66def demangle_symbol(symbol):
67    if _cppfilt_exe is None:
68        return symbol
69    result = subprocess.run([_cppfilt_exe], input=symbol.encode(), capture_output=True)
70    if result.returncode != 0:
71        return symbol
72    return result.stdout.decode()
73
74
75def is_elf(filename):
76    with open(filename, "rb") as f:
77        magic_bytes = f.read(4)
78    return magic_bytes == b"\x7fELF"
79
80
81def is_mach_o(filename):
82    with open(filename, "rb") as f:
83        magic_bytes = f.read(4)
84    return magic_bytes in [
85        b"\xfe\xed\xfa\xce",  # MH_MAGIC
86        b"\xce\xfa\xed\xfe",  # MH_CIGAM
87        b"\xfe\xed\xfa\xcf",  # MH_MAGIC_64
88        b"\xcf\xfa\xed\xfe",  # MH_CIGAM_64
89        b"\xca\xfe\xba\xbe",  # FAT_MAGIC
90        b"\xbe\xba\xfe\xca",  # FAT_CIGAM
91    ]
92
93
94def is_xcoff_or_big_ar(filename):
95    with open(filename, "rb") as f:
96        magic_bytes = f.read(7)
97    return (
98        magic_bytes[:4] in [b"\x01DF", b"\x01F7"]  # XCOFF32  # XCOFF64
99        or magic_bytes == b"<bigaf>"
100    )
101
102
103def is_library_file(filename):
104    if sys.platform == "darwin":
105        return is_mach_o(filename)
106    elif sys.platform.startswith("aix"):
107        return is_xcoff_or_big_ar(filename)
108    else:
109        return is_elf(filename)
110
111
112def extract_or_load(filename):
113    import libcxx.sym_check.extract
114
115    if is_library_file(filename):
116        return libcxx.sym_check.extract.extract_symbols(filename)
117    return read_syms_from_file(filename)
118
119
120def adjust_mangled_name(name):
121    if not name.startswith("__Z"):
122        return name
123    return name[1:]
124
125
126new_delete_std_symbols = ["_Znam", "_Znwm", "_ZdaPv", "_ZdaPvm", "_ZdlPv", "_ZdlPvm"]
127
128cxxabi_symbols = [
129    "___dynamic_cast",
130    "___gxx_personality_v0",
131    "_ZTIDi",
132    "_ZTIDn",
133    "_ZTIDs",
134    "_ZTIPDi",
135    "_ZTIPDn",
136    "_ZTIPDs",
137    "_ZTIPKDi",
138    "_ZTIPKDn",
139    "_ZTIPKDs",
140    "_ZTIPKa",
141    "_ZTIPKb",
142    "_ZTIPKc",
143    "_ZTIPKd",
144    "_ZTIPKe",
145    "_ZTIPKf",
146    "_ZTIPKh",
147    "_ZTIPKi",
148    "_ZTIPKj",
149    "_ZTIPKl",
150    "_ZTIPKm",
151    "_ZTIPKs",
152    "_ZTIPKt",
153    "_ZTIPKv",
154    "_ZTIPKw",
155    "_ZTIPKx",
156    "_ZTIPKy",
157    "_ZTIPa",
158    "_ZTIPb",
159    "_ZTIPc",
160    "_ZTIPd",
161    "_ZTIPe",
162    "_ZTIPf",
163    "_ZTIPh",
164    "_ZTIPi",
165    "_ZTIPj",
166    "_ZTIPl",
167    "_ZTIPm",
168    "_ZTIPs",
169    "_ZTIPt",
170    "_ZTIPv",
171    "_ZTIPw",
172    "_ZTIPx",
173    "_ZTIPy",
174    "_ZTIa",
175    "_ZTIb",
176    "_ZTIc",
177    "_ZTId",
178    "_ZTIe",
179    "_ZTIf",
180    "_ZTIh",
181    "_ZTIi",
182    "_ZTIj",
183    "_ZTIl",
184    "_ZTIm",
185    "_ZTIs",
186    "_ZTIt",
187    "_ZTIv",
188    "_ZTIw",
189    "_ZTIx",
190    "_ZTIy",
191    "_ZTSDi",
192    "_ZTSDn",
193    "_ZTSDs",
194    "_ZTSPDi",
195    "_ZTSPDn",
196    "_ZTSPDs",
197    "_ZTSPKDi",
198    "_ZTSPKDn",
199    "_ZTSPKDs",
200    "_ZTSPKa",
201    "_ZTSPKb",
202    "_ZTSPKc",
203    "_ZTSPKd",
204    "_ZTSPKe",
205    "_ZTSPKf",
206    "_ZTSPKh",
207    "_ZTSPKi",
208    "_ZTSPKj",
209    "_ZTSPKl",
210    "_ZTSPKm",
211    "_ZTSPKs",
212    "_ZTSPKt",
213    "_ZTSPKv",
214    "_ZTSPKw",
215    "_ZTSPKx",
216    "_ZTSPKy",
217    "_ZTSPa",
218    "_ZTSPb",
219    "_ZTSPc",
220    "_ZTSPd",
221    "_ZTSPe",
222    "_ZTSPf",
223    "_ZTSPh",
224    "_ZTSPi",
225    "_ZTSPj",
226    "_ZTSPl",
227    "_ZTSPm",
228    "_ZTSPs",
229    "_ZTSPt",
230    "_ZTSPv",
231    "_ZTSPw",
232    "_ZTSPx",
233    "_ZTSPy",
234    "_ZTSa",
235    "_ZTSb",
236    "_ZTSc",
237    "_ZTSd",
238    "_ZTSe",
239    "_ZTSf",
240    "_ZTSh",
241    "_ZTSi",
242    "_ZTSj",
243    "_ZTSl",
244    "_ZTSm",
245    "_ZTSs",
246    "_ZTSt",
247    "_ZTSv",
248    "_ZTSw",
249    "_ZTSx",
250    "_ZTSy",
251]
252
253
254def is_stdlib_symbol_name(name, sym):
255    name = adjust_mangled_name(name)
256    if re.search("@GLIBC|@GCC", name):
257        # Only when symbol is defined do we consider it ours
258        return sym["is_defined"]
259    if re.search("(St[0-9])|(__cxa)|(__cxxabi)", name):
260        return True
261    if name in new_delete_std_symbols:
262        return True
263    if name in cxxabi_symbols:
264        return True
265    if name.startswith("_Z"):
266        return True
267    return False
268
269
270def filter_stdlib_symbols(syms):
271    stdlib_symbols = []
272    other_symbols = []
273    for s in syms:
274        canon_name = adjust_mangled_name(s["name"])
275        if not is_stdlib_symbol_name(canon_name, s):
276            other_symbols += [s]
277        else:
278            stdlib_symbols += [s]
279    return stdlib_symbols, other_symbols
280