xref: /onnv-gate/usr/src/cmd/sgs/tools/common/leb128.c (revision 1618:8c9a4f31d225)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*1618Srie  * Common Development and Distribution License (the "License").
6*1618Srie  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*1618Srie 
220Sstevel@tonic-gate /*
23*1618Srie  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
26*1618Srie 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include <stdio.h>
300Sstevel@tonic-gate #include <dwarf.h>
310Sstevel@tonic-gate #include <sys/types.h>
320Sstevel@tonic-gate #include <sys/elf.h>
330Sstevel@tonic-gate 
340Sstevel@tonic-gate /*
350Sstevel@tonic-gate  * Little Endian Base 128 (LEB128) numbers.
360Sstevel@tonic-gate  * ----------------------------------------
370Sstevel@tonic-gate  *
380Sstevel@tonic-gate  * LEB128 is a scheme for encoding integers densely that exploits the
390Sstevel@tonic-gate  * assumption that most integers are small in magnitude. (This encoding
400Sstevel@tonic-gate  * is equally suitable whether the target machine architecture represents
410Sstevel@tonic-gate  * data in big-endian or little- endian
420Sstevel@tonic-gate  *
430Sstevel@tonic-gate  * Unsigned LEB128 numbers are encoded as follows: start at the low order
440Sstevel@tonic-gate  * end of an unsigned integer and chop it into 7-bit chunks. Place each
450Sstevel@tonic-gate  * chunk into the low order 7 bits of a byte. Typically, several of the
460Sstevel@tonic-gate  * high order bytes will be zero; discard them. Emit the remaining bytes in
470Sstevel@tonic-gate  * a stream, starting with the low order byte; set the high order bit on
480Sstevel@tonic-gate  * each byte except the last emitted byte. The high bit of zero on the last
490Sstevel@tonic-gate  * byte indicates to the decoder that it has encountered the last byte.
500Sstevel@tonic-gate  * The integer zero is a special case, consisting of a single zero byte.
510Sstevel@tonic-gate  *
520Sstevel@tonic-gate  * Signed, 2s complement LEB128 numbers are encoded in a similar except
530Sstevel@tonic-gate  * that the criterion for discarding high order bytes is not whether they
540Sstevel@tonic-gate  * are zero, but whether they consist entirely of sign extension bits.
550Sstevel@tonic-gate  * Consider the 32-bit integer -2. The three high level bytes of the number
560Sstevel@tonic-gate  * are sign extension, thus LEB128 would represent it as a single byte
570Sstevel@tonic-gate  * containing the low order 7 bits, with the high order bit cleared to
580Sstevel@tonic-gate  * indicate the end of the byte stream.
590Sstevel@tonic-gate  *
600Sstevel@tonic-gate  * Note that there is nothing within the LEB128 representation that
610Sstevel@tonic-gate  * indicates whether an encoded number is signed or unsigned. The decoder
620Sstevel@tonic-gate  * must know what type of number to expect.
630Sstevel@tonic-gate  *
640Sstevel@tonic-gate  * DWARF Exception Header Encoding
650Sstevel@tonic-gate  * -------------------------------
660Sstevel@tonic-gate  *
670Sstevel@tonic-gate  * The DWARF Exception Header Encoding is used to describe the type of data
680Sstevel@tonic-gate  * used in the .eh_frame_hdr section. The upper 4 bits indicate how the
690Sstevel@tonic-gate  * value is to be applied. The lower 4 bits indicate the format of the data.
700Sstevel@tonic-gate  *
710Sstevel@tonic-gate  * DWARF Exception Header value format
720Sstevel@tonic-gate  *
730Sstevel@tonic-gate  * Name		Value Meaning
740Sstevel@tonic-gate  * DW_EH_PE_omit	    0xff No value is present.
750Sstevel@tonic-gate  * DW_EH_PE_absptr	    0x00 Value is a void*
760Sstevel@tonic-gate  * DW_EH_PE_uleb128	    0x01 Unsigned value is encoded using the
770Sstevel@tonic-gate  *				 Little Endian Base 128 (LEB128)
780Sstevel@tonic-gate  * DW_EH_PE_udata2	    0x02 A 2 bytes unsigned value.
790Sstevel@tonic-gate  * DW_EH_PE_udata4	    0x03 A 4 bytes unsigned value.
800Sstevel@tonic-gate  * DW_EH_PE_udata8	    0x04 An 8 bytes unsigned value.
810Sstevel@tonic-gate  * DW_EH_PE_signed          0x08 bit on for all signed encodings
820Sstevel@tonic-gate  * DW_EH_PE_sleb128	    0x09 Signed value is encoded using the
830Sstevel@tonic-gate  *				 Little Endian Base 128 (LEB128)
840Sstevel@tonic-gate  * DW_EH_PE_sdata2	    0x0A A 2 bytes signed value.
850Sstevel@tonic-gate  * DW_EH_PE_sdata4	    0x0B A 4 bytes signed value.
860Sstevel@tonic-gate  * DW_EH_PE_sdata8	    0x0C An 8 bytes signed value.
870Sstevel@tonic-gate  *
880Sstevel@tonic-gate  * DWARF Exception Header application
890Sstevel@tonic-gate  *
900Sstevel@tonic-gate  * Name	    Value Meaning
910Sstevel@tonic-gate  * DW_EH_PE_absptr	   0x00 Value is used with no modification.
920Sstevel@tonic-gate  * DW_EH_PE_pcrel	   0x10 Value is reletive to the location of itself
930Sstevel@tonic-gate  * DW_EH_PE_textrel	   0x20
940Sstevel@tonic-gate  * DW_EH_PE_datarel	   0x30 Value is reletive to the beginning of the
950Sstevel@tonic-gate  *				eh_frame_hdr segment ( segment type
960Sstevel@tonic-gate  *			        PT_GNU_EH_FRAME )
970Sstevel@tonic-gate  * DW_EH_PE_funcrel        0x40
980Sstevel@tonic-gate  * DW_EH_PE_aligned        0x50 value is an aligned void*
990Sstevel@tonic-gate  * DW_EH_PE_indirect       0x80 bit to signal indirection after relocation
1000Sstevel@tonic-gate  * DW_EH_PE_omit	   0xff No value is present.
1010Sstevel@tonic-gate  *
1020Sstevel@tonic-gate  */
1030Sstevel@tonic-gate 
1040Sstevel@tonic-gate uint64_t
1050Sstevel@tonic-gate uleb_extract(unsigned char *data, uint64_t *dotp)
1060Sstevel@tonic-gate {
1070Sstevel@tonic-gate 	uint64_t	dot = *dotp;
1080Sstevel@tonic-gate 	uint64_t	res = 0;
1090Sstevel@tonic-gate 	int		more = 1;
1100Sstevel@tonic-gate 	int		shift = 0;
1110Sstevel@tonic-gate 	int		val;
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate 	data += dot;
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate 	while (more) {
1160Sstevel@tonic-gate 		/*
1170Sstevel@tonic-gate 		 * Pull off lower 7 bits
1180Sstevel@tonic-gate 		 */
1190Sstevel@tonic-gate 		val = (*data) & 0x7f;
1200Sstevel@tonic-gate 
1210Sstevel@tonic-gate 		/*
1220Sstevel@tonic-gate 		 * Add prepend value to head of number.
1230Sstevel@tonic-gate 		 */
1240Sstevel@tonic-gate 		res = res | (val << shift);
1250Sstevel@tonic-gate 
1260Sstevel@tonic-gate 		/*
1270Sstevel@tonic-gate 		 * Increment shift & dot pointer
1280Sstevel@tonic-gate 		 */
1290Sstevel@tonic-gate 		shift += 7;
1300Sstevel@tonic-gate 		dot++;
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate 		/*
1330Sstevel@tonic-gate 		 * Check to see if hi bit is set - if not, this
1340Sstevel@tonic-gate 		 * is the last byte.
1350Sstevel@tonic-gate 		 */
1360Sstevel@tonic-gate 		more = ((*data++) & 0x80) >> 7;
1370Sstevel@tonic-gate 	}
1380Sstevel@tonic-gate 	*dotp = dot;
1390Sstevel@tonic-gate 	return (res);
1400Sstevel@tonic-gate }
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate int64_t
1430Sstevel@tonic-gate sleb_extract(unsigned char *data, uint64_t *dotp)
1440Sstevel@tonic-gate {
1450Sstevel@tonic-gate 	uint64_t	dot = *dotp;
1460Sstevel@tonic-gate 	int64_t		res = 0;
1470Sstevel@tonic-gate 	int		more = 1;
1480Sstevel@tonic-gate 	int		shift = 0;
1490Sstevel@tonic-gate 	int		val;
1500Sstevel@tonic-gate 
1510Sstevel@tonic-gate 	data += dot;
1520Sstevel@tonic-gate 
1530Sstevel@tonic-gate 	while (more) {
1540Sstevel@tonic-gate 		/*
1550Sstevel@tonic-gate 		 * Pull off lower 7 bits
1560Sstevel@tonic-gate 		 */
1570Sstevel@tonic-gate 		val = (*data) & 0x7f;
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate 		/*
1600Sstevel@tonic-gate 		 * Add prepend value to head of number.
1610Sstevel@tonic-gate 		 */
1620Sstevel@tonic-gate 		res = res | (val << shift);
1630Sstevel@tonic-gate 
1640Sstevel@tonic-gate 		/*
1650Sstevel@tonic-gate 		 * Increment shift & dot pointer
1660Sstevel@tonic-gate 		 */
1670Sstevel@tonic-gate 		shift += 7;
1680Sstevel@tonic-gate 		dot++;
1690Sstevel@tonic-gate 
1700Sstevel@tonic-gate 		/*
1710Sstevel@tonic-gate 		 * Check to see if hi bit is set - if not, this
1720Sstevel@tonic-gate 		 * is the last byte.
1730Sstevel@tonic-gate 		 */
1740Sstevel@tonic-gate 		more = ((*data++) & 0x80) >> 7;
1750Sstevel@tonic-gate 	}
1760Sstevel@tonic-gate 	*dotp = dot;
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate 	/*
1790Sstevel@tonic-gate 	 * Make sure value is properly sign extended.
1800Sstevel@tonic-gate 	 */
1810Sstevel@tonic-gate 	res = (res << (64 - shift)) >> (64 - shift);
1820Sstevel@tonic-gate 
1830Sstevel@tonic-gate 	return (res);
1840Sstevel@tonic-gate }
1850Sstevel@tonic-gate 
1860Sstevel@tonic-gate uint64_t
1870Sstevel@tonic-gate dwarf_ehe_extract(unsigned char *data, uint64_t *dotp, uint_t ehe_flags,
1880Sstevel@tonic-gate     unsigned char *eident, uint64_t pcaddr)
1890Sstevel@tonic-gate {
1900Sstevel@tonic-gate 	uint64_t    dot = *dotp;
1910Sstevel@tonic-gate 	uint_t	    lsb;
1920Sstevel@tonic-gate 	uint_t	    wordsize;
1930Sstevel@tonic-gate 	uint_t	    fsize;
1940Sstevel@tonic-gate 	uint64_t    result;
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate 	if (eident[EI_DATA] == ELFDATA2LSB)
1970Sstevel@tonic-gate 		lsb = 1;
1980Sstevel@tonic-gate 	else
1990Sstevel@tonic-gate 		lsb = 0;
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate 	if (eident[EI_CLASS] == ELFCLASS64)
2020Sstevel@tonic-gate 		wordsize = 8;
2030Sstevel@tonic-gate 	else
2040Sstevel@tonic-gate 		wordsize = 4;
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate 	switch (ehe_flags & 0x0f) {
2070Sstevel@tonic-gate 	case DW_EH_PE_omit:
2080Sstevel@tonic-gate 		return (0);
2090Sstevel@tonic-gate 	case DW_EH_PE_absptr:
2100Sstevel@tonic-gate 		fsize = wordsize;
2110Sstevel@tonic-gate 		break;
2120Sstevel@tonic-gate 	case DW_EH_PE_udata8:
2130Sstevel@tonic-gate 	case DW_EH_PE_sdata8:
2140Sstevel@tonic-gate 		fsize = 8;
2150Sstevel@tonic-gate 		break;
2160Sstevel@tonic-gate 	case DW_EH_PE_udata4:
2170Sstevel@tonic-gate 	case DW_EH_PE_sdata4:
2180Sstevel@tonic-gate 		fsize = 4;
2190Sstevel@tonic-gate 		break;
2200Sstevel@tonic-gate 	case DW_EH_PE_udata2:
2210Sstevel@tonic-gate 	case DW_EH_PE_sdata2:
2220Sstevel@tonic-gate 		fsize = 2;
2230Sstevel@tonic-gate 		break;
2240Sstevel@tonic-gate 	case DW_EH_PE_uleb128:
2250Sstevel@tonic-gate 		return (uleb_extract(data, dotp));
2260Sstevel@tonic-gate 	case DW_EH_PE_sleb128:
2270Sstevel@tonic-gate 		return ((uint64_t)sleb_extract(data, dotp));
2280Sstevel@tonic-gate 	default:
2290Sstevel@tonic-gate 		return (0);
2300Sstevel@tonic-gate 	}
2310Sstevel@tonic-gate 
2320Sstevel@tonic-gate 	if (lsb) {
2330Sstevel@tonic-gate 		/*
2340Sstevel@tonic-gate 		 * Extract unaligned LSB formated data
2350Sstevel@tonic-gate 		 */
2360Sstevel@tonic-gate 		uint_t	cnt;
2370Sstevel@tonic-gate 
2380Sstevel@tonic-gate 		result = 0;
2390Sstevel@tonic-gate 		for (cnt = 0; cnt < fsize;
2400Sstevel@tonic-gate 		    cnt++, dot++) {
2410Sstevel@tonic-gate 			uint64_t val;
2420Sstevel@tonic-gate 			val = data[dot];
2430Sstevel@tonic-gate 			result |= val << (cnt * 8);
2440Sstevel@tonic-gate 		}
2450Sstevel@tonic-gate 	} else {
2460Sstevel@tonic-gate 		/*
2470Sstevel@tonic-gate 		 * Extract unaligned MSB formated data
2480Sstevel@tonic-gate 		 */
2490Sstevel@tonic-gate 		uint_t	cnt;
2500Sstevel@tonic-gate 		result = 0;
2510Sstevel@tonic-gate 		for (cnt = 0; cnt < fsize;
2520Sstevel@tonic-gate 		    cnt++, dot++) {
2530Sstevel@tonic-gate 			uint64_t	val;
2540Sstevel@tonic-gate 			val = data[dot];
2550Sstevel@tonic-gate 			result |= val << ((fsize - cnt - 1) * 8);
2560Sstevel@tonic-gate 		}
2570Sstevel@tonic-gate 	}
2580Sstevel@tonic-gate 	/*
2590Sstevel@tonic-gate 	 * perform sign extension
2600Sstevel@tonic-gate 	 */
2610Sstevel@tonic-gate 	if ((ehe_flags & DW_EH_PE_signed) &&
2620Sstevel@tonic-gate 	    (fsize < sizeof (uint64_t))) {
2630Sstevel@tonic-gate 		int64_t	sresult;
2640Sstevel@tonic-gate 		uint_t	bitshift;
2650Sstevel@tonic-gate 		sresult = result;
2660Sstevel@tonic-gate 		bitshift = (sizeof (uint64_t) - fsize) * 8;
2670Sstevel@tonic-gate 		sresult = (sresult << bitshift) >> bitshift;
2680Sstevel@tonic-gate 		result = sresult;
2690Sstevel@tonic-gate 	}
2700Sstevel@tonic-gate 
2710Sstevel@tonic-gate 	/*
2720Sstevel@tonic-gate 	 * If pcrel and we have a value (ie: we've been
2730Sstevel@tonic-gate 	 * relocated), then adjust the value.
2740Sstevel@tonic-gate 	 */
2750Sstevel@tonic-gate 	if (result && (ehe_flags & DW_EH_PE_pcrel)) {
2760Sstevel@tonic-gate 		result = pcaddr + result;
2770Sstevel@tonic-gate 	}
2780Sstevel@tonic-gate 	*dotp = dot;
2790Sstevel@tonic-gate 	return (result);
2800Sstevel@tonic-gate }
281