xref: /openbsd-src/lib/check_sym (revision 505ee9ea3b177e2387d907a91ca7da069f3f14d8)
1#!/bin/ksh
2#  $OpenBSD: check_sym,v 1.10 2019/10/05 01:01:23 guenther Exp $
3#
4# Copyright (c) 2016,2019 Philip Guenther <guenther@openbsd.org>
5#
6# Permission to use, copy, modify, and distribute this software for any
7# purpose with or without fee is hereby granted, provided that the above
8# copyright notice and this permission notice appear in all copies.
9#
10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17#
18#
19#  check_sym -- compare the symbols and external function references in two
20#	versions of a shared library
21#
22#  SYNOPSIS
23#	check_sym [-ch] [old [new]]
24#
25#  DESCRIPTION
26#	Library developers need to be aware when they have changed the
27#	ABI of a library.  To assist them, check_sym examines two versions
28#	of a shared library and reports changes to the following:
29#	 * the set of exported symbols and their strengths
30#	 * the set of undefined symbols referenced
31#	 * the set of lazily-resolved functions (PLT)
32#
33#	In each case, additions and removals are reported; for exported
34#	symbols it also reports when a symbol is weakened or strengthened.
35#
36#	The shared libraries to compare can be specified on the
37#	command-line.  Otherwise, check_sym expects to be run from the
38#	source directory of a library with a shlib_version file specifying
39#	the version being built and the new library in the obj subdirectory.
40#	If the old library to compare against wasn't specified either then
41#	check_sym will take the highest version of that library in the
42#	*current* directory, or the highest version of that library in
43#	/usr/lib if it wasn't present in the current directory.
44#
45#	check_sym uses fixed names in /tmp for its intermediate files,
46#	as they contain useful details for those trying to understand
47#	what changed.  If any of them cannot be created by the user,
48#	the command will fail.  The files can be cleaned up using
49#	the -c option.
50#
51#
52#	The *basic* rules of thumb for library versions are: if you
53#	 * stop exporting a symbol, or
54#	 * change the size of a data symbol
55#	 * start exporting a symbol that an inter-dependent library needs
56#	then you need to bump the MAJOR version of the library.
57#
58#	Otherwise, if you:
59#	 * start exporting a symbol
60#	then you need to bump the MINOR version of the library.
61#
62#  SEE ALSO
63#	readelf(1), elf(5)
64#
65#  AUTHORS
66#	Philip Guenther <guenther@openbsd.org>
67#
68#  CAVEATS
69#	The elf format is infinitely extendable, but check_sym only
70#	handles a few weirdnesses.  Running it on or against new archs
71#	may result in meaningless results.
72#
73#  BUGS
74#	While the author stills find the intermediate files useful,
75#	most people won't.  By default they should be placed in a
76#	temp directory and removed.
77#
78
79get_lib_name()
80{
81	sed -n 's/^[ 	]*LIB[ 	]*=[ 	]*\([^ 	]*\).*/\1/p' "$@"
82}
83
84pick_highest()
85{
86	old=
87	omaj=-1
88	omin=0
89	for i
90	do
91		[[ -f $i ]] || continue
92		maj=${i%.*}; maj=${maj##*.}
93		min=${i##*.}
94		if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]]
95		then
96			old=$i
97			omaj=$maj
98			omin=$min
99		fi
100	done
101	[[ $old != "" ]]
102}
103
104usage()
105{
106	usage="usage: check_sym [-chv] [old [new]]"
107	if [[ $# -gt 0 ]]
108	then
109		echo "check_sym: $@
110$usage" >&2
111		exit 1
112	fi
113	echo "$usage"
114	exit 0
115}
116
117file_list=/tmp/{D{,S,W,O},J,S,U,d,j,r,s}{1,2}
118
119verbose=false
120while getopts :chv opt "$@"
121do
122	case $opt in
123	h)	usage;;
124	c)	rm -f $file_list
125		exit 0;;
126	v)	verbose=true;;
127	\?)	usage "unknown option -- $OPTARG";;
128	esac
129done
130shift $((OPTIND - 1))
131[[ $# -gt 2 ]] && usage "too many arguments"
132
133# Old library?
134if [[ $1 = ?(*/)lib*.so* ]]
135then
136	if [[ ! -f $1 ]]
137	then
138		echo "$1 doesn't exist" >&2
139		exit 1
140	fi
141	old=$1
142	lib=${old##*/}
143	lib=${lib%%.so.*}
144	shift
145else
146	# try determining it from the current directory
147	if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) &&
148	   [[ $lib != "" ]]
149	then
150		lib=lib$lib
151	else
152		lib=libc
153	fi
154
155	# Is there a copy of that lib in the current directory?
156	# If so, use the highest numbered one
157	if ! pick_highest $lib.so.* && ! pick_highest /usr/lib/$lib.so.*
158	then
159		echo "unable to find $lib.so.*" >&2
160		exit 1
161	fi
162fi
163
164# New library?
165if [[ $1 = ?(*/)lib*.so* ]]
166then
167	new=$1
168	shift
169else
170	# Dig info out of the just built library
171	. ./shlib_version
172	new=obj/${lib}.so.${major}.${minor}
173fi
174if [[ ! -f $new ]]
175then
176	echo "$new doesn't exist" >&2
177	exit 1
178fi
179
180# Filter the output of readelf -s to be easier to parse by removing a
181# field that only appears on some symbols: [<other>: 88]
182# Not really arch-specific, but I've only seen it on alpha
183filt_symtab() {
184	sed 's/\[<other>: [0-9a-f]*\]//'
185}
186
187# precreate all the files we'll use, but with noclobber set to avoid
188# symlink attacks
189set -C
190files=
191trap 'rm -f $files' 1 2 15 ERR
192for i in $file_list
193do
194	rm -f $i
195	3>$i
196	files="$files $i"
197done
198set +C
199
200readelf -rW $old > /tmp/r1
201readelf -rW $new > /tmp/r2
202
203readelf -sW $old | filt_symtab > /tmp/s1
204readelf -sW $new | filt_symtab > /tmp/s2
205
206
207case $(readelf -h $new | grep '^ *Machine:') in
208*MIPS*)	cpu=mips64;;
209*HPPA*)	cpu=hppa;;
210*)	cpu=dontcare;;
211esac
212
213if [[ $cpu = mips64 ]]
214then
215	gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
216	gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
217fi
218
219jump_slots() {
220	case $cpu in
221	hppa)	awk '/IPLT/ && $5 != ""{print $5}' /tmp/r$1
222		;;
223	mips64)	# the $((gotsym$1)) converts hex to decimal
224		awk -v g=$((gotsym$1)) \
225			'/^Symbol table ..symtab/{exit}
226			$6 == "PROTECTED" { next }
227			$1+0 >= g && $4 == "FUNC" {print $8}' /tmp/s$1
228		;;
229	*)	awk '/JU*MP_SL/ && $5 != ""{print $5}' /tmp/r$1
230		;;
231	esac | sort -o /tmp/j$1
232}
233
234dynamic_sym() {
235	awk -v s=$1 '/^Symbol table ..symtab/{exit}
236		! /^ *[1-9]/   {next}
237		$7 == "UND"    {print $8 | ("sort -o /tmp/U" s); next }
238		$5 == "GLOBAL" {print $8 | ("sort -o /tmp/DS" s) }
239		$5 == "WEAK"   {print $8 | ("sort -o /tmp/DW" s) }
240		$5 != "LOCAL"  {print $8 | ("sort -o /tmp/D" s) }
241		$5 != "LOCAL" && $4 == "OBJECT" {
242				print $8, $3 | ("sort -o /tmp/DO" s) }
243		{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/d$1
244}
245
246static_sym() {
247	awk '/^Symbol table ..symtab/{s=1}
248	     /LOCAL/{next}
249	     s&&/^ *[1-9]/{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/S$1
250}
251
252data_sym_changes() {
253	join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }'
254}
255
256output_if_not_empty() {
257	leader=$1
258	shift
259	if "$@" | grep -q .
260	then
261		echo "$leader"
262		"$@" | sed 's:^:	:'
263		echo
264	fi
265}
266
267
268for i in 1 2
269do
270	jump_slots $i
271	dynamic_sym $i
272	static_sym $i
273	comm -23 /tmp/j$i /tmp/U$i >/tmp/J$i
274done
275
276echo "$old --> $new"
277if cmp -s /tmp/d[12] && cmp -s /tmp/DO[12]
278then
279	printf "No dynamic export changes\n"
280else
281	printf "Dynamic export changes:\n"
282	output_if_not_empty "added:" comm -13 /tmp/D[12]
283	output_if_not_empty "removed:" comm -23 /tmp/D[12]
284	output_if_not_empty "weakened:" comm -12 /tmp/DS1 /tmp/DW2
285	output_if_not_empty "strengthened:" comm -12 /tmp/DW1 /tmp/DS2
286	output_if_not_empty "data object sizes changes:" \
287					data_sym_changes /tmp/DO[12]
288fi
289if ! cmp -s /tmp/U[12]
290then
291	printf "External reference changes:\n"
292	output_if_not_empty "added:" comm -13 /tmp/U[12]
293	output_if_not_empty "removed:" comm -23 /tmp/U[12]
294fi
295
296if $verbose; then
297	printf "\nReloc counts:\nbefore:\n"
298	grep ^R /tmp/r1
299	printf "\nafter:\n"
300	grep ^R /tmp/r2
301fi
302
303output_if_not_empty "PLT added:" comm -13 /tmp/J1 /tmp/J2
304output_if_not_empty "PLT removed:" comm -23 /tmp/J1 /tmp/J2
305