1#!/bin/ksh 2# $OpenBSD: check_sym,v 1.14 2024/12/24 18:14:49 tb Exp $ 3# 4# Copyright (c) 2016,2019,2022 Philip Guenther <guenther@openbsd.org> 5# 6# Permission to use, copy, modify, and distribute this software for any 7# purpose with or without fee is hereby granted, provided that the above 8# copyright notice and this permission notice appear in all copies. 9# 10# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17# 18# 19# check_sym -- compare the symbols and external function references in two 20# versions of a library 21# 22# SYNOPSIS 23# check_sym [-chkSv] [old [new]] 24# 25# DESCRIPTION 26# Library developers need to be aware when they have changed the 27# ABI of a library. To assist them, check_sym examines two versions 28# of a shared library and reports changes to the following: 29# * the set of exported symbols and their strengths 30# * the set of undefined symbols referenced 31# * the set of lazily-resolved functions (PLT) 32# 33# In each case, additions and removals are reported; for exported 34# symbols it also reports when a symbol is weakened or strengthened. 35# 36# With the -S option, a similar analysis is done but for the static lib. 37# 38# The shared libraries to compare can be specified on the 39# command-line. Otherwise, check_sym expects to be run from the 40# source directory of a library with a shlib_version file specifying 41# the version being built and the new library in the obj subdirectory. 42# If the old library to compare against wasn't specified either then 43# check_sym will take the highest version of that library in the 44# *current* directory, or the highest version of that library in 45# /usr/lib if it wasn't present in the current directory. 46# 47# By default, check_sym places all its intermediate files in a 48# temporary directory and removes it on exit. They contain useful 49# details for understanding what changed, so if the -k option is used 50# they will instead be placed in /tmp/ and left behind. If any of 51# them cannot be created by the user, the command will fail. The 52# files left behind by the -k option can be cleaned up by invoking 53# check_syms with the -c option. 54# 55# The -v option enables verbose output, showing relocation counts. 56# 57# The *basic* rules of thumb for library versions are: if you 58# * stop exporting a symbol, or 59# * change the size of a data symbol 60# * start exporting a symbol that an inter-dependent library needs 61# then you need to bump the MAJOR version of the library. 62# 63# Otherwise, if you: 64# * start exporting a symbol 65# then you need to bump the MINOR version of the library. 66# 67# SEE ALSO 68# readelf(1), elf(5) 69# 70# AUTHORS 71# Philip Guenther <guenther@openbsd.org> 72# 73# CAVEATS 74# The elf format is infinitely extendable, but check_sym only 75# handles a few weirdnesses. Running it on or against new archs 76# may result in meaningless results. 77# 78# BUGS 79# While the author stills find the intermediate files useful, 80# most people won't. By default they should be placed in a 81# temp directory and removed. 82# 83 84get_lib_name() 85{ 86 sed -n '/^[ ]*LIB[ ]*=/{ s/^[^=]*=[ ]*\([^ ]*\).*/\1/p; q;}' "$@" 87} 88 89pick_highest() 90{ 91 old= 92 omaj=-1 93 omin=0 94 for i 95 do 96 [[ -f $i ]] || continue 97 maj=${i%.*}; maj=${maj##*.} 98 min=${i##*.} 99 if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]] 100 then 101 old=$i 102 omaj=$maj 103 omin=$min 104 fi 105 done 106 [[ $old != "" ]] 107} 108 109fail() { echo "$*" >&2; exit 1; } 110 111usage() 112{ 113 usage="usage: check_sym [-chkSv] [old [new]]" 114 [[ $# -eq 0 ]] || fail "check_sym: $* 115$usage" 116 echo "$usage" 117 exit 0 118} 119 120 121# 122# Output helpers 123# 124data_sym_changes() 125{ 126 join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }' 127} 128 129output_if_not_empty() 130{ 131 leader=$1 132 shift 133 if "$@" | grep -q . 134 then 135 echo "$leader" 136 "$@" | sed 's:^: :' 137 echo 138 fi 139} 140 141 142# 143# Dynamic library routines 144# 145 146dynamic_collect() 147{ 148 readelf -sW $old | filt_symtab > $odir/Ds1 149 readelf -sW $new | filt_symtab > $odir/Ds2 150 151 readelf -rW $old > $odir/r1 152 readelf -rW $new > $odir/r2 153 154 case $(readelf -h $new | grep '^ *Machine:') in 155 *MIPS*) cpu=mips64 156 gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}') 157 gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}') 158 ;; 159 *HPPA*) cpu=hppa;; 160 *) cpu=dontcare;; 161 esac 162} 163 164jump_slots() 165{ 166 case $cpu in 167 hppa) awk '/IPLT/ && $5 != ""{print $5}' r$1 168 ;; 169 mips64) # the $((gotsym$1)) converts hex to decimal 170 awk -v g=$((gotsym$1)) \ 171 '/^Symbol table ..symtab/{exit} 172 $6 == "PROTECTED" { next } 173 $1+0 >= g && $4 == "FUNC" {print $8}' Ds$1 174 ;; 175 *) awk '/JU*MP_SL/ && $5 != ""{print $5}' r$1 176 ;; 177 esac | sort -o j$1 178} 179 180dynamic_sym() 181{ 182 awk -v s=$1 '/^Symbol table ..symtab/{exit} 183 ! /^ *[1-9]/ {next} 184 $5 == "LOCAL" {next} 185 $7 == "UND" {print $8 | ("sort -o DU" s); next } 186 $5 == "GLOBAL" {print $8 | ("sort -o DS" s) } 187 $5 == "WEAK" {print $8 | ("sort -o DW" s) } 188 $4 == "OBJECT" {print $8, $3 | ("sort -o DO" s) } 189 {print $8 | ("sort -o D" s) 190 print $4, $5, $6, $8}' Ds$1 | sort -o d$1 191} 192 193static_sym() 194{ 195 awk '/^Symbol table ..symtab/{s=1} 196 /LOCAL/{next} 197 s&&/^ *[1-9]/{print $4, $5, $6, $8}' Ds$1 | sort -o s$1 198} 199 200dynamic_analysis() 201{ 202 jump_slots $1 203 dynamic_sym $1 204 #static_sym $1 205 comm -23 j$1 DU$1 >J$1 206 return 0 207} 208 209dynamic_output() 210{ 211 if cmp -s d[12] && cmp -s DO[12] 212 then 213 printf "No dynamic export changes\n" 214 else 215 printf "Dynamic export changes:\n" 216 output_if_not_empty "added:" comm -13 D[12] 217 output_if_not_empty "removed:" comm -23 D[12] 218 output_if_not_empty "weakened:" comm -12 DS1 DW2 219 output_if_not_empty "strengthened:" comm -12 DW1 DS2 220 output_if_not_empty "data object sizes changes:" \ 221 data_sym_changes DO[12] 222 fi 223 if ! cmp -s DU[12] 224 then 225 printf "External reference changes:\n" 226 output_if_not_empty "added:" comm -13 DU[12] 227 output_if_not_empty "removed:" comm -23 DU[12] 228 fi 229 230 if $verbose; then 231 printf "\nReloc counts:\nbefore:\n" 232 grep ^R r1 233 printf "\nafter:\n" 234 grep ^R r2 235 fi 236 237 output_if_not_empty "PLT added:" comm -13 J[12] 238 output_if_not_empty "PLT removed:" comm -23 J[12] 239} 240 241 242# 243# Static library routines 244# 245static_collect() 246{ 247 readelf -sW $old | filt_ret | filt_symtab > $odir/Ss1 248 readelf -sW $new | filt_ret | filt_symtab > $odir/Ss2 249} 250 251static_analysis() 252{ 253 awk -v s=$1 '!/^ *[1-9]/{next} 254 $5 == "LOCAL" {next} 255 $7 == "UND" {print $8 | ("sort -uo SU" s); next } 256 $6 == "HIDDEN" {print $8 | ("sort -uo SH" s) } 257 $5 == "GLOBAL" {print $8 | ("sort -o SS" s) } 258 $5 == "WEAK" {print $8 | ("sort -o SW" s) } 259 $4 == "OBJECT" {print $8, $3 | ("sort -o SO" s) } 260 {print $8 | ("sort -o S" s) 261 print $4, $5, $6, $8}' Ss$1 | sort -o s$1 262 grep -v '^_' SH$1 >Sh$1 || : 263} 264 265static_output() 266{ 267 output_if_not_empty "hidden but not reserved:" comm -13 Sh[12] 268 if cmp -s s[12] && cmp -s SO[12] 269 then 270 printf "No static export changes\n" 271 else 272 printf "Static export changes:\n" 273 output_if_not_empty "added:" comm -13 S[12] 274 output_if_not_empty "removed:" comm -23 S[12] 275 output_if_not_empty "weakened:" comm -12 SS1 SW2 276 output_if_not_empty "strengthened:" comm -12 SW1 SS2 277 output_if_not_empty "data object sizes changes:" \ 278 data_sym_changes SO[12] 279 fi 280 if ! cmp -s SU[12] 281 then 282 printf "External reference changes:\n" 283 output_if_not_empty "added:" comm -13 SU[12] 284 output_if_not_empty "removed:" comm -23 SU[12] 285 fi 286} 287 288 289unset odir 290file_list={D{,O,S,s,W,U},J,d,j,r}{1,2} 291static_file_list={S{,H,h,O,S,U,W},U,s}{1,2} 292 293keep_temp=false 294dynamic=true 295static=false 296verbose=false 297 298do_static() { static=true dynamic=false file_list=$static_file_list; } 299 300while getopts :chkSv opt "$@" 301do 302 case $opt in 303 c) rm -f /tmp/$file_list 304 exit 0;; 305 h) usage;; 306 k) keep_temp=true;; 307 S) do_static;; 308 v) verbose=true;; 309 \?) usage "unknown option -- $OPTARG";; 310 esac 311done 312shift $((OPTIND - 1)) 313[[ $# -gt 2 ]] && usage "too many arguments" 314 315# Old library? 316if ! $static && [[ $1 = ?(*/)lib*.so* ]] 317then 318 [[ -f $1 ]] || fail "$1 doesn't exist" 319 old=$1 320 lib=${old##*/} 321 lib=${lib%%.so.*} 322 shift 323elif [[ $1 = ?(*/)lib*.a ]] 324then 325 # woo hoo, static library mode 326 do_static 327 if [[ -f $1 ]] 328 then 329 old=$1 330 lib=${old##*/} 331 elif [[ $1 = lib*.a && -f /usr/lib/$1 ]] 332 then 333 old=/usr/lib/$1 334 lib=$1 335 else 336 fail "$1 doesn't exist" 337 fi 338 lib=${lib%%.a} 339 shift 340else 341 # try determining it from the current directory 342 if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) && 343 [[ $lib != "" ]] 344 then 345 lib=lib$lib 346 else 347 lib=libc 348 fi 349 350 # Is there a copy of that lib in the current directory? 351 # If so, use the highest numbered one 352 if ! $static && 353 ! pick_highest $lib.so.* && 354 ! pick_highest /usr/lib/$lib.so.* 355 then 356 fail "unable to find $lib.so.*" 357 elif $static 358 then 359 old=/usr/lib/${lib}.a 360 [[ -f $old ]] || fail "$old doesn't exist" 361 fi 362fi 363 364# New library? 365if [[ $1 = ?(*/)lib*.so* ]] || 366 { $static && [[ $1 = ?(*/)lib*.a ]]; } 367then 368 new=$1 369 shift 370elif $static 371then 372 new=obj/${lib}.a 373else 374 # Dig info out of the just built library 375 . ./shlib_version 376 new=obj/${lib}.so.${major}.${minor} 377fi 378[[ -f $new ]] || fail "$new doesn't exist" 379 380# Filter the output of readelf -s to be easier to parse by removing a 381# field that only appears on some symbols: [<other>: 88] 382# Not really arch-specific, but I've only seen it on alpha 383filt_symtab() { sed 's/\[<other>: [0-9a-f]*\]//'; } 384filt_ret() { egrep -v ' (__retguard_[0-9]+|__llvm_retpoline_[a-z]+[0-9]*)$'; } 385 386if $keep_temp 387then 388 # precreate all the files we'll use, but with noclobber set to avoid 389 # symlink attacks 390 odir=/tmp 391 files= 392 trap 'ret=$?; rm -f $files; exit $ret' 1 2 15 ERR 393else 394 trap 'ret=$?; rm -rf "$odir"; exit $ret' 0 1 2 15 ERR 395 odir=$(mktemp -dt check_sym.XXXXXXXXXX) 396fi 397set -C 398for i in $odir/$file_list 399do 400 rm -f $i 401 3>$i 402 files="$files $i" 403done 404set +C 405 406 407# 408# Collect data 409# 410$dynamic && dynamic_collect 411$static && static_collect 412 413# Now that we're done accessing $old and $new (which could be 414# relative paths), chdir into our work directory, whatever it is 415cd $odir 416 417# 418# Do The Job 419# 420for i in 1 2 421do 422 $dynamic && dynamic_analysis $i 423 $static && static_analysis $i 424done 425 426{ 427 echo "$old --> $new" 428 ! $dynamic || dynamic_output 429 ! $static || static_output 430} 431 432