xref: /openbsd-src/lib/check_sym (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1#!/bin/ksh
2#  $OpenBSD: check_sym,v 1.5 2017/08/11 17:58:21 guenther Exp $
3#
4# Copyright (c) 2016 Philip Guenther <guenther@openbsd.org>
5#
6# Permission to use, copy, modify, and distribute this software for any
7# purpose with or without fee is hereby granted, provided that the above
8# copyright notice and this permission notice appear in all copies.
9#
10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17#
18#
19#  check_sym -- compare the symbols and external function references in two
20#	versions of a shared library
21#
22#  SYNOPSIS
23#	check_sym [-ch] [old [new]]
24#
25#  DESCRIPTION
26#	Library developers need to be aware when they have changed the
27#	ABI of a library.  To assist them, check_sym examines two versions
28#	of a shared library and reports changes to the following:
29#	 * the set of exported symbols and their strengths
30#	 * the set of undefined symbols referenced
31#	 * the set of lazily-resolved functions (PLT)
32#
33#	In each case, additions and removals are reported; for exported
34#	symbols it also reports when a symbol is weakened or strengthened.
35#
36#	The shared libraries to compare can be specified on the
37#	command-line.  Otherwise, check_sym expects to be run from the
38#	source directory of a library, with a shlib_version file specifying
39#	the version being built and the new library in the obj subdirectory.
40#	If the old library to compare against, wasn't specified either then
41#	check_sym will take the highest version of that library in the
42#	*current* directory, or the highest version of that library in
43#	/usr/lib if it wasn't present in the current directory.
44#
45#	check_sym uses fixed names in /tmp for its intermediate files,
46#	as they contain useful details for those trying to understand
47#	what changed.  If any of them cannot be created by the user,
48#	the command will fail.  The files can be cleaned up using
49#	the -c option.
50#
51#
52#	The *basic* rules of thumb for library versions are: if you
53#	 * stop exporting a symbol, or
54#	 * change the size of a data symbol (not reported by check_sym)
55#	 * start exporting a symbol that an inter-dependent library needs
56#	then you need to bump the MAJOR version of the library.
57#
58#	Otherwise, if you:
59#	 * start exporting a symbol
60#	then you need to bump the MINOR version of the library.
61#
62#  SEE ALSO
63#	readelf(1), elf(5)
64#
65#  AUTHORS
66#	Philip Guenther <guenther@openbsd.org>
67#
68#  CAVEATS
69#	The elf format is infinitely extendable, but check_sym only
70#	handles a few weirdnesses.  Running it on or against new archs
71#	may result in meaningless results.
72#
73#  BUGS
74#	Should report changes in the size of exported data objects.
75#
76#	While the author stills find the intermediate files useful,
77#	most people won't.  By default they should be placed in a
78#	temp directory and removed.
79#
80
81get_lib_name()
82{
83	sed -n 's/^[ 	]*LIB[ 	]*=[ 	]*\([^ 	]*\).*/\1/p' "$@"
84}
85
86pick_highest()
87{
88	old=
89	omaj=-1
90	omin=0
91	for i
92	do
93		[[ -f $i ]] || continue
94		maj=${i%.*}; maj=${maj##*.}
95		min=${i##*.}
96		if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]]
97		then
98			old=$i
99			omaj=$maj
100			omin=$min
101		fi
102	done
103	[[ $old != "" ]]
104}
105
106usage()
107{
108	usage="usage: check_sym [-chv] [old [new]]"
109	if [[ $# -gt 0 ]]
110	then
111		echo "check_sym: $@
112$usage" >&2
113		exit 1
114	fi
115	echo "$usage"
116	exit 0
117}
118
119file_list=/tmp/{D{,S,W},J,S,U,d,j,r,s}{1,2}
120
121verbose=false
122while getopts :chv opt "$@"
123do
124	case $opt in
125	h)	usage;;
126	c)	rm -f $file_list
127		exit 0;;
128	v)	verbose=true;;
129	\?)	usage "unknown option -- $OPTARG";;
130	esac
131done
132shift $((OPTIND - 1))
133[[ $# -gt 2 ]] && usage "too many arguments"
134
135# Old library?
136if [[ $1 = ?(*/)lib*.so* ]]
137then
138	if [[ ! -f $1 ]]
139	then
140		echo "$1 doesn't exist" >&2
141		exit 1
142	fi
143	old=$1
144	lib=${old##*/}
145	lib=${lib%%.so.*}
146	shift
147else
148	# try determining it from the current directory
149	if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) &&
150	   [[ $lib != "" ]]
151	then
152		lib=lib$lib
153	else
154		lib=libc
155	fi
156
157	# Is there a copy of that lib in the current directory?
158	# If so, use the highest numbered one
159	if ! pick_highest $lib.so.* && ! pick_highest /usr/lib/$lib.so.*
160	then
161		echo "unable to find $lib.so.*" >&2
162		exit 1
163	fi
164fi
165
166# New library?
167if [[ $1 = ?(*/)lib*.so* ]]
168then
169	new=$1
170	shift
171else
172	# Dig info out of the just built library
173	. ./shlib_version
174	new=obj/${lib}.so.${major}.${minor}
175fi
176if [[ ! -f $new ]]
177then
178	echo "$new doesn't exist" >&2
179	exit 1
180fi
181
182# Filter the output of readelf -s to be easier to parse by removing a
183# field that only appears on some symbols: [<other>: 88]
184# Not really arch-specific, but I've only seen it on alpha
185filt_symtab() {
186	sed 's/\[<other>: [0-9a-f]*\]//'
187}
188
189# precreate all the files we'll use, but with noclobber set to avoid
190# symlink attacks
191set -C
192files=
193trap 'rm -f $files' 1 2 15 ERR
194for i in $file_list
195do
196	rm -f $i
197	3>$i
198	files="$files $i"
199done
200set +C
201
202readelf -rW $old > /tmp/r1
203readelf -rW $new > /tmp/r2
204
205readelf -sW $old | filt_symtab > /tmp/s1
206readelf -sW $new | filt_symtab > /tmp/s2
207
208
209cpu=$(uname -p)
210if [[ $cpu = mips64* ]]
211then
212	gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
213	gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}')
214fi
215
216jump_slots() {
217	case $cpu in
218	hppa*)	awk '/IPLT/ && $5 != ""{print $5}' /tmp/r$1
219		;;
220	mips*)	# the $((gotsym$1)) converts hex to decimal
221		awk -v g=$((gotsym$1)) \
222			'/^Symbol table ..symtab/{exit}
223			$1+0 >= g && $4 == "FUNC" {print $8}' /tmp/s$1
224		;;
225	*)	awk '/JU*MP_SL/ && $5 != ""{print $5}' /tmp/r$1
226		;;
227	esac | sort -o /tmp/j$1
228}
229
230dynamic_sym() {
231	awk -v s=$1 '/^Symbol table ..symtab/{exit}
232		! /^ *[1-9]/   {next}
233		$7 == "UND"    {print $8 | ("sort -o /tmp/U" s); next }
234		$5 == "GLOBAL" {print $8 | ("sort -o /tmp/DS" s) }
235		$5 == "WEAK"   {print $8 | ("sort -o /tmp/DW" s) }
236		$5 != "LOCAL"  {print $8 | ("sort -o /tmp/D" s) }
237		{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/d$1
238#	awk -v s=$1 '$2 == "GLOBAL" {print $4 | ("sort -o /tmp/DS" s) }
239#		     $2 == "WEAK"   {print $4 | ("sort -o /tmp/DW" s) }
240#		     $1 != "SECTION"{print $4}' /tmp/d$1 | sort -o /tmp/D$1
241}
242
243static_sym() {
244	awk '/^Symbol table ..symtab/{s=1}
245	     /LOCAL/{next}
246	     s&&/^ *[1-9]/{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/S$1
247}
248
249output_if_not_empty() {
250	leader=$1
251	shift
252	if "$@" | grep -q .
253	then
254		echo "$leader"
255		"$@" | sed 's:^:	:'
256		echo
257	fi
258}
259
260
261for i in 1 2
262do
263	jump_slots $i
264	dynamic_sym $i
265	static_sym $i
266	comm -23 /tmp/j$i /tmp/U$i >/tmp/J$i
267done
268
269echo "$old --> $new"
270if cmp -s /tmp/d[12]
271then
272	printf "No dynamic export changes\n"
273else
274	printf "Dynamic export changes:\n"
275	output_if_not_empty "added:" comm -13 /tmp/D[12]
276	output_if_not_empty "removed:" comm -23 /tmp/D[12]
277	output_if_not_empty "weakened:" comm -12 /tmp/DS1 /tmp/DW2
278	output_if_not_empty "strengthened:" comm -12 /tmp/DW1 /tmp/DS2
279fi
280if ! cmp -s /tmp/U[12]
281then
282	printf "External reference changes:\n"
283	output_if_not_empty "added:" comm -13 /tmp/U[12]
284	output_if_not_empty "removed:" comm -23 /tmp/U[12]
285fi
286
287if $verbose; then
288	printf "\nReloc counts:\nbefore:\n"
289	grep ^R /tmp/r1
290	printf "\nafter:\n"
291	grep ^R /tmp/r2
292fi
293
294output_if_not_empty "PLT added:" comm -13 /tmp/J1 /tmp/J2
295output_if_not_empty "PLT removed:" comm -23 /tmp/J1 /tmp/J2
296