xref: /openbsd-src/lib/check_sym (revision d7922f91b633bd649d004aed2641da227c6f102b)
1#!/bin/ksh
2#  $OpenBSD: check_sym,v 1.14 2024/12/24 18:14:49 tb Exp $
3#
4# Copyright (c) 2016,2019,2022 Philip Guenther <guenther@openbsd.org>
5#
6# Permission to use, copy, modify, and distribute this software for any
7# purpose with or without fee is hereby granted, provided that the above
8# copyright notice and this permission notice appear in all copies.
9#
10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17#
18#
19#  check_sym -- compare the symbols and external function references in two
20#	versions of a library
21#
22#  SYNOPSIS
23#	check_sym [-chkSv] [old [new]]
24#
25#  DESCRIPTION
26#	Library developers need to be aware when they have changed the
27#	ABI of a library.  To assist them, check_sym examines two versions
28#	of a shared library and reports changes to the following:
29#	 * the set of exported symbols and their strengths
30#	 * the set of undefined symbols referenced
31#	 * the set of lazily-resolved functions (PLT)
32#
33#	In each case, additions and removals are reported; for exported
34#	symbols it also reports when a symbol is weakened or strengthened.
35#
36#	With the -S option, a similar analysis is done but for the static lib.
37#
38#	The shared libraries to compare can be specified on the
39#	command-line.  Otherwise, check_sym expects to be run from the
40#	source directory of a library with a shlib_version file specifying
41#	the version being built and the new library in the obj subdirectory.
42#	If the old library to compare against wasn't specified either then
43#	check_sym will take the highest version of that library in the
44#	*current* directory, or the highest version of that library in
45#	/usr/lib if it wasn't present in the current directory.
46#
47#	By default, check_sym places all its intermediate files in a
48#	temporary directory and removes it on exit.  They contain useful
49#	details for understanding what changed, so if the -k option is used
50#	they will instead be placed in /tmp/ and left behind.  If any of
51#	them cannot be created by the user, the command will fail.  The
52#	files left behind by the -k option can be cleaned up by invoking
53#	check_syms with the -c option.
54#
55#	The -v option enables verbose output, showing relocation counts.
56#
57#	The *basic* rules of thumb for library versions are: if you
58#	 * stop exporting a symbol, or
59#	 * change the size of a data symbol
60#	 * start exporting a symbol that an inter-dependent library needs
61#	then you need to bump the MAJOR version of the library.
62#
63#	Otherwise, if you:
64#	 * start exporting a symbol
65#	then you need to bump the MINOR version of the library.
66#
67#  SEE ALSO
68#	readelf(1), elf(5)
69#
70#  AUTHORS
71#	Philip Guenther <guenther@openbsd.org>
72#
73#  CAVEATS
74#	The elf format is infinitely extendable, but check_sym only
75#	handles a few weirdnesses.  Running it on or against new archs
76#	may result in meaningless results.
77#
78#  BUGS
79#	While the author stills find the intermediate files useful,
80#	most people won't.  By default they should be placed in a
81#	temp directory and removed.
82#
83
84get_lib_name()
85{
86	sed -n '/^[ 	]*LIB[ 	]*=/{ s/^[^=]*=[ 	]*\([^ 	]*\).*/\1/p; q;}' "$@"
87}
88
89pick_highest()
90{
91	old=
92	omaj=-1
93	omin=0
94	for i
95	do
96		[[ -f $i ]] || continue
97		maj=${i%.*}; maj=${maj##*.}
98		min=${i##*.}
99		if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]]
100		then
101			old=$i
102			omaj=$maj
103			omin=$min
104		fi
105	done
106	[[ $old != "" ]]
107}
108
109fail() { echo "$*" >&2; exit 1; }
110
111usage()
112{
113	usage="usage: check_sym [-chkSv] [old [new]]"
114	[[ $# -eq 0 ]] || fail "check_sym: $*
115$usage"
116	echo "$usage"
117	exit 0
118}
119
120
121#
122#  Output helpers
123#
124data_sym_changes()
125{
126	join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }'
127}
128
129output_if_not_empty()
130{
131	leader=$1
132	shift
133	if "$@" | grep -q .
134	then
135		echo "$leader"
136		"$@" | sed 's:^:	:'
137		echo
138	fi
139}
140
141
142#
143#  Dynamic library routines
144#
145
146dynamic_collect()
147{
148	readelf -sW $old | filt_symtab > $odir/Ds1
149	readelf -sW $new | filt_symtab > $odir/Ds2
150
151	readelf -rW $old > $odir/r1
152	readelf -rW $new > $odir/r2
153
154	case $(readelf -h $new | grep '^ *Machine:') in
155	*MIPS*)	cpu=mips64
156		gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
157		gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
158		;;
159	*HPPA*)	cpu=hppa;;
160	*)	cpu=dontcare;;
161	esac
162}
163
164jump_slots()
165{
166	case $cpu in
167	hppa)	awk '/IPLT/ && $5 != ""{print $5}' r$1
168		;;
169	mips64)	# the $((gotsym$1)) converts hex to decimal
170		awk -v g=$((gotsym$1)) \
171			'/^Symbol table ..symtab/{exit}
172			$6 == "PROTECTED" { next }
173			$1+0 >= g && $4 == "FUNC" {print $8}' Ds$1
174		;;
175	*)	awk '/JU*MP_SL/ && $5 != ""{print $5}' r$1
176		;;
177	esac | sort -o j$1
178}
179
180dynamic_sym()
181{
182	awk -v s=$1 '/^Symbol table ..symtab/{exit}
183		! /^ *[1-9]/   {next}
184		$5 == "LOCAL"  {next}
185		$7 == "UND"    {print $8     | ("sort -o DU" s); next }
186		$5 == "GLOBAL" {print $8     | ("sort -o DS" s) }
187		$5 == "WEAK"   {print $8     | ("sort -o DW" s) }
188		$4 == "OBJECT" {print $8, $3 | ("sort -o DO" s) }
189		{print $8 | ("sort -o D" s)
190		 print $4, $5, $6, $8}' Ds$1 | sort -o d$1
191}
192
193static_sym()
194{
195	awk '/^Symbol table ..symtab/{s=1}
196	     /LOCAL/{next}
197	     s&&/^ *[1-9]/{print $4, $5, $6, $8}' Ds$1 | sort -o s$1
198}
199
200dynamic_analysis()
201{
202	jump_slots $1
203	dynamic_sym $1
204	#static_sym $1
205	comm -23 j$1 DU$1 >J$1
206	return 0
207}
208
209dynamic_output()
210{
211	if cmp -s d[12] && cmp -s DO[12]
212	then
213		printf "No dynamic export changes\n"
214	else
215		printf "Dynamic export changes:\n"
216		output_if_not_empty "added:" comm -13 D[12]
217		output_if_not_empty "removed:" comm -23 D[12]
218		output_if_not_empty "weakened:" comm -12 DS1 DW2
219		output_if_not_empty "strengthened:" comm -12 DW1 DS2
220		output_if_not_empty "data object sizes changes:" \
221						data_sym_changes DO[12]
222	fi
223	if ! cmp -s DU[12]
224	then
225		printf "External reference changes:\n"
226		output_if_not_empty "added:" comm -13 DU[12]
227		output_if_not_empty "removed:" comm -23 DU[12]
228	fi
229
230	if $verbose; then
231		printf "\nReloc counts:\nbefore:\n"
232		grep ^R r1
233		printf "\nafter:\n"
234		grep ^R r2
235	fi
236
237	output_if_not_empty "PLT added:" comm -13 J[12]
238	output_if_not_empty "PLT removed:" comm -23 J[12]
239}
240
241
242#
243#  Static library routines
244#
245static_collect()
246{
247	readelf -sW $old | filt_ret | filt_symtab > $odir/Ss1
248	readelf -sW $new | filt_ret | filt_symtab > $odir/Ss2
249}
250
251static_analysis()
252{
253	awk -v s=$1 '!/^ *[1-9]/{next}
254		$5 == "LOCAL"  {next}
255		$7 == "UND"    {print $8     | ("sort -uo SU" s); next }
256		$6 == "HIDDEN" {print $8     | ("sort -uo SH" s) }
257		$5 == "GLOBAL" {print $8     | ("sort -o SS" s) }
258		$5 == "WEAK"   {print $8     | ("sort -o SW" s) }
259		$4 == "OBJECT" {print $8, $3 | ("sort -o SO" s) }
260		{print $8 | ("sort -o S" s)
261		 print $4, $5, $6, $8}' Ss$1 | sort -o s$1
262	grep -v '^_' SH$1 >Sh$1 || :
263}
264
265static_output()
266{
267	output_if_not_empty "hidden but not reserved:" comm -13 Sh[12]
268	if cmp -s s[12] && cmp -s SO[12]
269	then
270		printf "No static export changes\n"
271	else
272		printf "Static export changes:\n"
273		output_if_not_empty "added:" comm -13 S[12]
274		output_if_not_empty "removed:" comm -23 S[12]
275		output_if_not_empty "weakened:" comm -12 SS1 SW2
276		output_if_not_empty "strengthened:" comm -12 SW1 SS2
277		output_if_not_empty "data object sizes changes:" \
278						data_sym_changes SO[12]
279	fi
280	if ! cmp -s SU[12]
281	then
282		printf "External reference changes:\n"
283		output_if_not_empty "added:" comm -13 SU[12]
284		output_if_not_empty "removed:" comm -23 SU[12]
285	fi
286}
287
288
289unset odir
290file_list={D{,O,S,s,W,U},J,d,j,r}{1,2}
291static_file_list={S{,H,h,O,S,U,W},U,s}{1,2}
292
293keep_temp=false
294dynamic=true
295static=false
296verbose=false
297
298do_static() { static=true dynamic=false file_list=$static_file_list; }
299
300while getopts :chkSv opt "$@"
301do
302	case $opt in
303	c)	rm -f /tmp/$file_list
304		exit 0;;
305	h)	usage;;
306	k)	keep_temp=true;;
307	S)	do_static;;
308	v)	verbose=true;;
309	\?)	usage "unknown option -- $OPTARG";;
310	esac
311done
312shift $((OPTIND - 1))
313[[ $# -gt 2 ]] && usage "too many arguments"
314
315# Old library?
316if ! $static && [[ $1 = ?(*/)lib*.so* ]]
317then
318	[[ -f $1 ]] || fail "$1 doesn't exist"
319	old=$1
320	lib=${old##*/}
321	lib=${lib%%.so.*}
322	shift
323elif [[ $1 = ?(*/)lib*.a ]]
324then
325	# woo hoo, static library mode
326	do_static
327	if [[ -f $1 ]]
328	then
329		old=$1
330		lib=${old##*/}
331	elif [[ $1 = lib*.a && -f /usr/lib/$1 ]]
332	then
333		old=/usr/lib/$1
334		lib=$1
335	else
336		fail "$1 doesn't exist"
337	fi
338	lib=${lib%%.a}
339	shift
340else
341	# try determining it from the current directory
342	if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) &&
343	   [[ $lib != "" ]]
344	then
345		lib=lib$lib
346	else
347		lib=libc
348	fi
349
350	# Is there a copy of that lib in the current directory?
351	# If so, use the highest numbered one
352	if ! $static &&
353	   ! pick_highest $lib.so.* &&
354	   ! pick_highest /usr/lib/$lib.so.*
355	then
356		fail "unable to find $lib.so.*"
357	elif $static
358	then
359		old=/usr/lib/${lib}.a
360		[[ -f $old ]] || fail "$old doesn't exist"
361	fi
362fi
363
364# New library?
365if [[ $1 = ?(*/)lib*.so* ]] ||
366   { $static && [[ $1 = ?(*/)lib*.a ]]; }
367then
368	new=$1
369	shift
370elif $static
371then
372	new=obj/${lib}.a
373else
374	# Dig info out of the just built library
375	. ./shlib_version
376	new=obj/${lib}.so.${major}.${minor}
377fi
378[[ -f $new ]] || fail "$new doesn't exist"
379
380# Filter the output of readelf -s to be easier to parse by removing a
381# field that only appears on some symbols: [<other>: 88]
382# Not really arch-specific, but I've only seen it on alpha
383filt_symtab() { sed 's/\[<other>: [0-9a-f]*\]//'; }
384filt_ret() { egrep -v ' (__retguard_[0-9]+|__llvm_retpoline_[a-z]+[0-9]*)$'; }
385
386if $keep_temp
387then
388	# precreate all the files we'll use, but with noclobber set to avoid
389	# symlink attacks
390	odir=/tmp
391	files=
392	trap 'ret=$?; rm -f $files; exit $ret' 1 2 15 ERR
393else
394	trap 'ret=$?; rm -rf "$odir"; exit $ret' 0 1 2 15 ERR
395	odir=$(mktemp -dt check_sym.XXXXXXXXXX)
396fi
397set -C
398for i in $odir/$file_list
399do
400	rm -f $i
401	3>$i
402	files="$files $i"
403done
404set +C
405
406
407#
408#  Collect data
409#
410$dynamic && dynamic_collect
411$static	 && static_collect
412
413# Now that we're done accessing $old and $new (which could be
414# relative paths), chdir into our work directory, whatever it is
415cd $odir
416
417#
418#  Do The Job
419#
420for i in 1 2
421do
422	$dynamic && dynamic_analysis $i
423	$static	 && static_analysis $i
424done
425
426{
427	echo "$old --> $new"
428	! $dynamic || dynamic_output
429	! $static  || static_output
430}
431
432