xref: /openbsd-src/gnu/llvm/libcxx/utils/libcxx/sym_check/util.py (revision 4bdff4bed0e3d54e55670334c7d0077db4170f86)
1#===----------------------------------------------------------------------===##
2#
3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4# See https://llvm.org/LICENSE.txt for license information.
5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6#
7#===----------------------------------------------------------------------===##
8
9from pprint import pformat
10import ast
11import distutils.spawn
12import re
13import subprocess
14import sys
15
16
17def read_syms_from_list(slist):
18    """
19    Read a list of symbols from a list of strings.
20    Each string is one symbol.
21    """
22    return [ast.literal_eval(l) for l in slist]
23
24
25def read_syms_from_file(filename):
26    """
27    Read a list of symbols in from a file.
28    """
29    with open(filename, 'r') as f:
30        data = f.read()
31    return read_syms_from_list(data.splitlines())
32
33
34def read_exclusions(filename):
35    with open(filename, 'r') as f:
36        data = f.read()
37    lines = [l.strip() for l in data.splitlines() if l.strip()]
38    lines = [l for l in lines if not l.startswith('#')]
39    return lines
40
41
42def write_syms(sym_list, out=None, names_only=False, filter=None):
43    """
44    Write a list of symbols to the file named by out.
45    """
46    out_str = ''
47    out_list = sym_list
48    out_list.sort(key=lambda x: x['name'])
49    if filter is not None:
50        out_list = filter(out_list)
51    if names_only:
52        out_list = [sym['name'] for sym in out_list]
53    for sym in out_list:
54        # Use pformat for consistent ordering of keys.
55        out_str += pformat(sym, width=100000) + '\n'
56    if out is None:
57        sys.stdout.write(out_str)
58    else:
59        with open(out, 'w') as f:
60            f.write(out_str)
61
62
63_cppfilt_exe = distutils.spawn.find_executable('c++filt')
64
65
66def demangle_symbol(symbol):
67    if _cppfilt_exe is None:
68        return symbol
69    result = subprocess.run([_cppfilt_exe], input=symbol.encode(), capture_output=True)
70    if result.returncode != 0:
71        return symbol
72    return result.stdout.decode()
73
74
75def is_elf(filename):
76    with open(filename, 'rb') as f:
77        magic_bytes = f.read(4)
78    return magic_bytes == b'\x7fELF'
79
80
81def is_mach_o(filename):
82    with open(filename, 'rb') as f:
83        magic_bytes = f.read(4)
84    return magic_bytes in [
85        b'\xfe\xed\xfa\xce',  # MH_MAGIC
86        b'\xce\xfa\xed\xfe',  # MH_CIGAM
87        b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
88        b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
89        b'\xca\xfe\xba\xbe',  # FAT_MAGIC
90        b'\xbe\xba\xfe\xca'   # FAT_CIGAM
91    ]
92
93def is_xcoff_or_big_ar(filename):
94    with open(filename, 'rb') as f:
95        magic_bytes = f.read(7)
96    return magic_bytes[:4] in [
97        b'\x01DF',  # XCOFF32
98        b'\x01F7'   # XCOFF64
99    ] or magic_bytes == b'<bigaf>'
100
101def is_library_file(filename):
102    if sys.platform == 'darwin':
103        return is_mach_o(filename)
104    elif sys.platform.startswith('aix'):
105        return is_xcoff_or_big_ar(filename)
106    else:
107        return is_elf(filename)
108
109
110def extract_or_load(filename):
111    import libcxx.sym_check.extract
112    if is_library_file(filename):
113        return libcxx.sym_check.extract.extract_symbols(filename)
114    return read_syms_from_file(filename)
115
116def adjust_mangled_name(name):
117    if not name.startswith('__Z'):
118        return name
119    return name[1:]
120
121new_delete_std_symbols = [
122    '_Znam',
123    '_Znwm',
124    '_ZdaPv',
125    '_ZdaPvm',
126    '_ZdlPv',
127    '_ZdlPvm'
128]
129
130cxxabi_symbols = [
131    '___dynamic_cast',
132    '___gxx_personality_v0',
133    '_ZTIDi',
134    '_ZTIDn',
135    '_ZTIDs',
136    '_ZTIPDi',
137    '_ZTIPDn',
138    '_ZTIPDs',
139    '_ZTIPKDi',
140    '_ZTIPKDn',
141    '_ZTIPKDs',
142    '_ZTIPKa',
143    '_ZTIPKb',
144    '_ZTIPKc',
145    '_ZTIPKd',
146    '_ZTIPKe',
147    '_ZTIPKf',
148    '_ZTIPKh',
149    '_ZTIPKi',
150    '_ZTIPKj',
151    '_ZTIPKl',
152    '_ZTIPKm',
153    '_ZTIPKs',
154    '_ZTIPKt',
155    '_ZTIPKv',
156    '_ZTIPKw',
157    '_ZTIPKx',
158    '_ZTIPKy',
159    '_ZTIPa',
160    '_ZTIPb',
161    '_ZTIPc',
162    '_ZTIPd',
163    '_ZTIPe',
164    '_ZTIPf',
165    '_ZTIPh',
166    '_ZTIPi',
167    '_ZTIPj',
168    '_ZTIPl',
169    '_ZTIPm',
170    '_ZTIPs',
171    '_ZTIPt',
172    '_ZTIPv',
173    '_ZTIPw',
174    '_ZTIPx',
175    '_ZTIPy',
176    '_ZTIa',
177    '_ZTIb',
178    '_ZTIc',
179    '_ZTId',
180    '_ZTIe',
181    '_ZTIf',
182    '_ZTIh',
183    '_ZTIi',
184    '_ZTIj',
185    '_ZTIl',
186    '_ZTIm',
187    '_ZTIs',
188    '_ZTIt',
189    '_ZTIv',
190    '_ZTIw',
191    '_ZTIx',
192    '_ZTIy',
193    '_ZTSDi',
194    '_ZTSDn',
195    '_ZTSDs',
196    '_ZTSPDi',
197    '_ZTSPDn',
198    '_ZTSPDs',
199    '_ZTSPKDi',
200    '_ZTSPKDn',
201    '_ZTSPKDs',
202    '_ZTSPKa',
203    '_ZTSPKb',
204    '_ZTSPKc',
205    '_ZTSPKd',
206    '_ZTSPKe',
207    '_ZTSPKf',
208    '_ZTSPKh',
209    '_ZTSPKi',
210    '_ZTSPKj',
211    '_ZTSPKl',
212    '_ZTSPKm',
213    '_ZTSPKs',
214    '_ZTSPKt',
215    '_ZTSPKv',
216    '_ZTSPKw',
217    '_ZTSPKx',
218    '_ZTSPKy',
219    '_ZTSPa',
220    '_ZTSPb',
221    '_ZTSPc',
222    '_ZTSPd',
223    '_ZTSPe',
224    '_ZTSPf',
225    '_ZTSPh',
226    '_ZTSPi',
227    '_ZTSPj',
228    '_ZTSPl',
229    '_ZTSPm',
230    '_ZTSPs',
231    '_ZTSPt',
232    '_ZTSPv',
233    '_ZTSPw',
234    '_ZTSPx',
235    '_ZTSPy',
236    '_ZTSa',
237    '_ZTSb',
238    '_ZTSc',
239    '_ZTSd',
240    '_ZTSe',
241    '_ZTSf',
242    '_ZTSh',
243    '_ZTSi',
244    '_ZTSj',
245    '_ZTSl',
246    '_ZTSm',
247    '_ZTSs',
248    '_ZTSt',
249    '_ZTSv',
250    '_ZTSw',
251    '_ZTSx',
252    '_ZTSy'
253]
254
255def is_stdlib_symbol_name(name, sym):
256    name = adjust_mangled_name(name)
257    if re.search("@GLIBC|@GCC", name):
258        # Only when symbol is defined do we consider it ours
259        return sym['is_defined']
260    if re.search('(St[0-9])|(__cxa)|(__cxxabi)', name):
261        return True
262    if name in new_delete_std_symbols:
263        return True
264    if name in cxxabi_symbols:
265        return True
266    if name.startswith('_Z'):
267        return True
268    return False
269
270def filter_stdlib_symbols(syms):
271    stdlib_symbols = []
272    other_symbols = []
273    for s in syms:
274        canon_name = adjust_mangled_name(s['name'])
275        if not is_stdlib_symbol_name(canon_name, s):
276            other_symbols += [s]
277        else:
278            stdlib_symbols += [s]
279    return stdlib_symbols, other_symbols
280