xref: /minix3/external/public-domain/xz/dist/src/common/tuklib_integer.h (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
15a645f22SBen Gras ///////////////////////////////////////////////////////////////////////////////
25a645f22SBen Gras //
35a645f22SBen Gras /// \file       tuklib_integer.h
45a645f22SBen Gras /// \brief      Various integer and bit operations
55a645f22SBen Gras ///
65a645f22SBen Gras /// This file provides macros or functions to do some basic integer and bit
75a645f22SBen Gras /// operations.
85a645f22SBen Gras ///
95a645f22SBen Gras /// Endianness related integer operations (XX = 16, 32, or 64; Y = b or l):
105a645f22SBen Gras ///   - Byte swapping: bswapXX(num)
115a645f22SBen Gras ///   - Byte order conversions to/from native: convXXYe(num)
125a645f22SBen Gras ///   - Aligned reads: readXXYe(ptr)
135a645f22SBen Gras ///   - Aligned writes: writeXXYe(ptr, num)
145a645f22SBen Gras ///   - Unaligned reads (16/32-bit only): unaligned_readXXYe(ptr)
155a645f22SBen Gras ///   - Unaligned writes (16/32-bit only): unaligned_writeXXYe(ptr, num)
165a645f22SBen Gras ///
175a645f22SBen Gras /// Since they can macros, the arguments should have no side effects since
185a645f22SBen Gras /// they may be evaluated more than once.
195a645f22SBen Gras ///
205a645f22SBen Gras /// \todo       PowerPC and possibly some other architectures support
215a645f22SBen Gras ///             byte swapping load and store instructions. This file
225a645f22SBen Gras ///             doesn't take advantage of those instructions.
235a645f22SBen Gras ///
245a645f22SBen Gras /// Bit scan operations for non-zero 32-bit integers:
255a645f22SBen Gras ///   - Bit scan reverse (find highest non-zero bit): bsr32(num)
265a645f22SBen Gras ///   - Count leading zeros: clz32(num)
275a645f22SBen Gras ///   - Count trailing zeros: ctz32(num)
285a645f22SBen Gras ///   - Bit scan forward (simply an alias for ctz32()): bsf32(num)
295a645f22SBen Gras ///
305a645f22SBen Gras /// The above bit scan operations return 0-31. If num is zero,
315a645f22SBen Gras /// the result is undefined.
325a645f22SBen Gras //
335a645f22SBen Gras //  Authors:    Lasse Collin
345a645f22SBen Gras //              Joachim Henke
355a645f22SBen Gras //
365a645f22SBen Gras //  This file has been put into the public domain.
375a645f22SBen Gras //  You can do whatever you want with this file.
385a645f22SBen Gras //
395a645f22SBen Gras ///////////////////////////////////////////////////////////////////////////////
405a645f22SBen Gras 
415a645f22SBen Gras #ifndef TUKLIB_INTEGER_H
425a645f22SBen Gras #define TUKLIB_INTEGER_H
435a645f22SBen Gras 
445a645f22SBen Gras #include "tuklib_common.h"
455a645f22SBen Gras 
465a645f22SBen Gras 
475a645f22SBen Gras ////////////////////////////////////////
485a645f22SBen Gras // Operating system specific features //
495a645f22SBen Gras ////////////////////////////////////////
505a645f22SBen Gras 
515a645f22SBen Gras #if defined(HAVE_BYTESWAP_H)
525a645f22SBen Gras 	// glibc, uClibc, dietlibc
535a645f22SBen Gras #	include <byteswap.h>
545a645f22SBen Gras #	ifdef HAVE_BSWAP_16
555a645f22SBen Gras #		define bswap16(num) bswap_16(num)
565a645f22SBen Gras #	endif
575a645f22SBen Gras #	ifdef HAVE_BSWAP_32
585a645f22SBen Gras #		define bswap32(num) bswap_32(num)
595a645f22SBen Gras #	endif
605a645f22SBen Gras #	ifdef HAVE_BSWAP_64
615a645f22SBen Gras #		define bswap64(num) bswap_64(num)
625a645f22SBen Gras #	endif
635a645f22SBen Gras 
645a645f22SBen Gras #elif defined(HAVE_SYS_ENDIAN_H)
655a645f22SBen Gras 	// *BSDs and Darwin
665a645f22SBen Gras #	include <sys/endian.h>
675a645f22SBen Gras 
685a645f22SBen Gras #elif defined(HAVE_SYS_BYTEORDER_H)
695a645f22SBen Gras 	// Solaris
705a645f22SBen Gras #	include <sys/byteorder.h>
715a645f22SBen Gras #	ifdef BSWAP_16
725a645f22SBen Gras #		define bswap16(num) BSWAP_16(num)
735a645f22SBen Gras #	endif
745a645f22SBen Gras #	ifdef BSWAP_32
755a645f22SBen Gras #		define bswap32(num) BSWAP_32(num)
765a645f22SBen Gras #	endif
775a645f22SBen Gras #	ifdef BSWAP_64
785a645f22SBen Gras #		define bswap64(num) BSWAP_64(num)
795a645f22SBen Gras #	endif
805a645f22SBen Gras #	ifdef BE_16
815a645f22SBen Gras #		define conv16be(num) BE_16(num)
825a645f22SBen Gras #	endif
835a645f22SBen Gras #	ifdef BE_32
845a645f22SBen Gras #		define conv32be(num) BE_32(num)
855a645f22SBen Gras #	endif
865a645f22SBen Gras #	ifdef BE_64
875a645f22SBen Gras #		define conv64be(num) BE_64(num)
885a645f22SBen Gras #	endif
895a645f22SBen Gras #	ifdef LE_16
905a645f22SBen Gras #		define conv16le(num) LE_16(num)
915a645f22SBen Gras #	endif
925a645f22SBen Gras #	ifdef LE_32
935a645f22SBen Gras #		define conv32le(num) LE_32(num)
945a645f22SBen Gras #	endif
955a645f22SBen Gras #	ifdef LE_64
965a645f22SBen Gras #		define conv64le(num) LE_64(num)
975a645f22SBen Gras #	endif
985a645f22SBen Gras #endif
995a645f22SBen Gras 
1005a645f22SBen Gras 
1015a645f22SBen Gras ///////////////////
1025a645f22SBen Gras // Byte swapping //
1035a645f22SBen Gras ///////////////////
1045a645f22SBen Gras 
1055a645f22SBen Gras #ifndef bswap16
1065a645f22SBen Gras #	define bswap16(num) \
1075a645f22SBen Gras 		(((uint16_t)(num) << 8) | ((uint16_t)(num) >> 8))
1085a645f22SBen Gras #endif
1095a645f22SBen Gras 
1105a645f22SBen Gras #ifndef bswap32
1115a645f22SBen Gras #	define bswap32(num) \
1125a645f22SBen Gras 		( (((uint32_t)(num) << 24)                       ) \
1135a645f22SBen Gras 		| (((uint32_t)(num) <<  8) & UINT32_C(0x00FF0000)) \
1145a645f22SBen Gras 		| (((uint32_t)(num) >>  8) & UINT32_C(0x0000FF00)) \
1155a645f22SBen Gras 		| (((uint32_t)(num) >> 24)                       ) )
1165a645f22SBen Gras #endif
1175a645f22SBen Gras 
1185a645f22SBen Gras #ifndef bswap64
1195a645f22SBen Gras #	define bswap64(num) \
1205a645f22SBen Gras 		( (((uint64_t)(num) << 56)                               ) \
1215a645f22SBen Gras 		| (((uint64_t)(num) << 40) & UINT64_C(0x00FF000000000000)) \
1225a645f22SBen Gras 		| (((uint64_t)(num) << 24) & UINT64_C(0x0000FF0000000000)) \
1235a645f22SBen Gras 		| (((uint64_t)(num) <<  8) & UINT64_C(0x000000FF00000000)) \
1245a645f22SBen Gras 		| (((uint64_t)(num) >>  8) & UINT64_C(0x00000000FF000000)) \
1255a645f22SBen Gras 		| (((uint64_t)(num) >> 24) & UINT64_C(0x0000000000FF0000)) \
1265a645f22SBen Gras 		| (((uint64_t)(num) >> 40) & UINT64_C(0x000000000000FF00)) \
1275a645f22SBen Gras 		| (((uint64_t)(num) >> 56)                               ) )
1285a645f22SBen Gras #endif
1295a645f22SBen Gras 
1305a645f22SBen Gras // Define conversion macros using the basic byte swapping macros.
1315a645f22SBen Gras #ifdef WORDS_BIGENDIAN
1325a645f22SBen Gras #	ifndef conv16be
1335a645f22SBen Gras #		define conv16be(num) ((uint16_t)(num))
1345a645f22SBen Gras #	endif
1355a645f22SBen Gras #	ifndef conv32be
1365a645f22SBen Gras #		define conv32be(num) ((uint32_t)(num))
1375a645f22SBen Gras #	endif
1385a645f22SBen Gras #	ifndef conv64be
1395a645f22SBen Gras #		define conv64be(num) ((uint64_t)(num))
1405a645f22SBen Gras #	endif
1415a645f22SBen Gras #	ifndef conv16le
1425a645f22SBen Gras #		define conv16le(num) bswap16(num)
1435a645f22SBen Gras #	endif
1445a645f22SBen Gras #	ifndef conv32le
1455a645f22SBen Gras #		define conv32le(num) bswap32(num)
1465a645f22SBen Gras #	endif
1475a645f22SBen Gras #	ifndef conv64le
1485a645f22SBen Gras #		define conv64le(num) bswap64(num)
1495a645f22SBen Gras #	endif
1505a645f22SBen Gras #else
1515a645f22SBen Gras #	ifndef conv16be
1525a645f22SBen Gras #		define conv16be(num) bswap16(num)
1535a645f22SBen Gras #	endif
1545a645f22SBen Gras #	ifndef conv32be
1555a645f22SBen Gras #		define conv32be(num) bswap32(num)
1565a645f22SBen Gras #	endif
1575a645f22SBen Gras #	ifndef conv64be
1585a645f22SBen Gras #		define conv64be(num) bswap64(num)
1595a645f22SBen Gras #	endif
1605a645f22SBen Gras #	ifndef conv16le
1615a645f22SBen Gras #		define conv16le(num) ((uint16_t)(num))
1625a645f22SBen Gras #	endif
1635a645f22SBen Gras #	ifndef conv32le
1645a645f22SBen Gras #		define conv32le(num) ((uint32_t)(num))
1655a645f22SBen Gras #	endif
1665a645f22SBen Gras #	ifndef conv64le
1675a645f22SBen Gras #		define conv64le(num) ((uint64_t)(num))
1685a645f22SBen Gras #	endif
1695a645f22SBen Gras #endif
1705a645f22SBen Gras 
1715a645f22SBen Gras 
1725a645f22SBen Gras //////////////////////////////
1735a645f22SBen Gras // Aligned reads and writes //
1745a645f22SBen Gras //////////////////////////////
1755a645f22SBen Gras 
1765a645f22SBen Gras static inline uint16_t
read16be(const uint8_t * buf)1775a645f22SBen Gras read16be(const uint8_t *buf)
1785a645f22SBen Gras {
1795a645f22SBen Gras 	uint16_t num = *(const uint16_t *)buf;
1805a645f22SBen Gras 	return conv16be(num);
1815a645f22SBen Gras }
1825a645f22SBen Gras 
1835a645f22SBen Gras 
1845a645f22SBen Gras static inline uint16_t
read16le(const uint8_t * buf)1855a645f22SBen Gras read16le(const uint8_t *buf)
1865a645f22SBen Gras {
1875a645f22SBen Gras 	uint16_t num = *(const uint16_t *)buf;
1885a645f22SBen Gras 	return conv16le(num);
1895a645f22SBen Gras }
1905a645f22SBen Gras 
1915a645f22SBen Gras 
1925a645f22SBen Gras static inline uint32_t
read32be(const uint8_t * buf)1935a645f22SBen Gras read32be(const uint8_t *buf)
1945a645f22SBen Gras {
1955a645f22SBen Gras 	uint32_t num = *(const uint32_t *)buf;
1965a645f22SBen Gras 	return conv32be(num);
1975a645f22SBen Gras }
1985a645f22SBen Gras 
1995a645f22SBen Gras 
2005a645f22SBen Gras static inline uint32_t
read32le(const uint8_t * buf)2015a645f22SBen Gras read32le(const uint8_t *buf)
2025a645f22SBen Gras {
2035a645f22SBen Gras 	uint32_t num = *(const uint32_t *)buf;
2045a645f22SBen Gras 	return conv32le(num);
2055a645f22SBen Gras }
2065a645f22SBen Gras 
2075a645f22SBen Gras 
2085a645f22SBen Gras static inline uint64_t
read64be(const uint8_t * buf)2095a645f22SBen Gras read64be(const uint8_t *buf)
2105a645f22SBen Gras {
2115a645f22SBen Gras 	uint64_t num = *(const uint64_t *)buf;
2125a645f22SBen Gras 	return conv64be(num);
2135a645f22SBen Gras }
2145a645f22SBen Gras 
2155a645f22SBen Gras 
2165a645f22SBen Gras static inline uint64_t
read64le(const uint8_t * buf)2175a645f22SBen Gras read64le(const uint8_t *buf)
2185a645f22SBen Gras {
2195a645f22SBen Gras 	uint64_t num = *(const uint64_t *)buf;
2205a645f22SBen Gras 	return conv64le(num);
2215a645f22SBen Gras }
2225a645f22SBen Gras 
2235a645f22SBen Gras 
2245a645f22SBen Gras // NOTE: Possible byte swapping must be done in a macro to allow GCC
2255a645f22SBen Gras // to optimize byte swapping of constants when using glibc's or *BSD's
2265a645f22SBen Gras // byte swapping macros. The actual write is done in an inline function
2275a645f22SBen Gras // to make type checking of the buf pointer possible similarly to readXXYe()
2285a645f22SBen Gras // functions.
2295a645f22SBen Gras 
2305a645f22SBen Gras #define write16be(buf, num) write16ne((buf), conv16be(num))
2315a645f22SBen Gras #define write16le(buf, num) write16ne((buf), conv16le(num))
2325a645f22SBen Gras #define write32be(buf, num) write32ne((buf), conv32be(num))
2335a645f22SBen Gras #define write32le(buf, num) write32ne((buf), conv32le(num))
2345a645f22SBen Gras #define write64be(buf, num) write64ne((buf), conv64be(num))
2355a645f22SBen Gras #define write64le(buf, num) write64ne((buf), conv64le(num))
2365a645f22SBen Gras 
2375a645f22SBen Gras 
2385a645f22SBen Gras static inline void
write16ne(uint8_t * buf,uint16_t num)2395a645f22SBen Gras write16ne(uint8_t *buf, uint16_t num)
2405a645f22SBen Gras {
2415a645f22SBen Gras 	*(uint16_t *)buf = num;
2425a645f22SBen Gras 	return;
2435a645f22SBen Gras }
2445a645f22SBen Gras 
2455a645f22SBen Gras 
2465a645f22SBen Gras static inline void
write32ne(uint8_t * buf,uint32_t num)2475a645f22SBen Gras write32ne(uint8_t *buf, uint32_t num)
2485a645f22SBen Gras {
2495a645f22SBen Gras 	*(uint32_t *)buf = num;
2505a645f22SBen Gras 	return;
2515a645f22SBen Gras }
2525a645f22SBen Gras 
2535a645f22SBen Gras 
2545a645f22SBen Gras static inline void
write64ne(uint8_t * buf,uint64_t num)2555a645f22SBen Gras write64ne(uint8_t *buf, uint64_t num)
2565a645f22SBen Gras {
2575a645f22SBen Gras 	*(uint64_t *)buf = num;
2585a645f22SBen Gras 	return;
2595a645f22SBen Gras }
2605a645f22SBen Gras 
2615a645f22SBen Gras 
2625a645f22SBen Gras ////////////////////////////////
2635a645f22SBen Gras // Unaligned reads and writes //
2645a645f22SBen Gras ////////////////////////////////
2655a645f22SBen Gras 
2665a645f22SBen Gras // NOTE: TUKLIB_FAST_UNALIGNED_ACCESS indicates only support for 16-bit and
2675a645f22SBen Gras // 32-bit unaligned integer loads and stores. It's possible that 64-bit
2685a645f22SBen Gras // unaligned access doesn't work or is slower than byte-by-byte access.
2695a645f22SBen Gras // Since unaligned 64-bit is probably not needed as often as 16-bit or
2705a645f22SBen Gras // 32-bit, we simply don't support 64-bit unaligned access for now.
2715a645f22SBen Gras #ifdef TUKLIB_FAST_UNALIGNED_ACCESS
2725a645f22SBen Gras #	define unaligned_read16be read16be
2735a645f22SBen Gras #	define unaligned_read16le read16le
2745a645f22SBen Gras #	define unaligned_read32be read32be
2755a645f22SBen Gras #	define unaligned_read32le read32le
2765a645f22SBen Gras #	define unaligned_write16be write16be
2775a645f22SBen Gras #	define unaligned_write16le write16le
2785a645f22SBen Gras #	define unaligned_write32be write32be
2795a645f22SBen Gras #	define unaligned_write32le write32le
2805a645f22SBen Gras 
2815a645f22SBen Gras #else
2825a645f22SBen Gras 
2835a645f22SBen Gras static inline uint16_t
unaligned_read16be(const uint8_t * buf)2845a645f22SBen Gras unaligned_read16be(const uint8_t *buf)
2855a645f22SBen Gras {
2865a645f22SBen Gras 	uint16_t num = ((uint16_t)buf[0] << 8) | (uint16_t)buf[1];
2875a645f22SBen Gras 	return num;
2885a645f22SBen Gras }
2895a645f22SBen Gras 
2905a645f22SBen Gras 
2915a645f22SBen Gras static inline uint16_t
unaligned_read16le(const uint8_t * buf)2925a645f22SBen Gras unaligned_read16le(const uint8_t *buf)
2935a645f22SBen Gras {
2945a645f22SBen Gras 	uint16_t num = ((uint16_t)buf[0]) | ((uint16_t)buf[1] << 8);
2955a645f22SBen Gras 	return num;
2965a645f22SBen Gras }
2975a645f22SBen Gras 
2985a645f22SBen Gras 
2995a645f22SBen Gras static inline uint32_t
unaligned_read32be(const uint8_t * buf)3005a645f22SBen Gras unaligned_read32be(const uint8_t *buf)
3015a645f22SBen Gras {
3025a645f22SBen Gras 	uint32_t num = (uint32_t)buf[0] << 24;
3035a645f22SBen Gras 	num |= (uint32_t)buf[1] << 16;
3045a645f22SBen Gras 	num |= (uint32_t)buf[2] << 8;
3055a645f22SBen Gras 	num |= (uint32_t)buf[3];
3065a645f22SBen Gras 	return num;
3075a645f22SBen Gras }
3085a645f22SBen Gras 
3095a645f22SBen Gras 
3105a645f22SBen Gras static inline uint32_t
unaligned_read32le(const uint8_t * buf)3115a645f22SBen Gras unaligned_read32le(const uint8_t *buf)
3125a645f22SBen Gras {
3135a645f22SBen Gras 	uint32_t num = (uint32_t)buf[0];
3145a645f22SBen Gras 	num |= (uint32_t)buf[1] << 8;
3155a645f22SBen Gras 	num |= (uint32_t)buf[2] << 16;
3165a645f22SBen Gras 	num |= (uint32_t)buf[3] << 24;
3175a645f22SBen Gras 	return num;
3185a645f22SBen Gras }
3195a645f22SBen Gras 
3205a645f22SBen Gras 
3215a645f22SBen Gras static inline void
unaligned_write16be(uint8_t * buf,uint16_t num)3225a645f22SBen Gras unaligned_write16be(uint8_t *buf, uint16_t num)
3235a645f22SBen Gras {
324*0a6a1f1dSLionel Sambuc 	buf[0] = (uint8_t)(num >> 8);
325*0a6a1f1dSLionel Sambuc 	buf[1] = (uint8_t)num;
3265a645f22SBen Gras 	return;
3275a645f22SBen Gras }
3285a645f22SBen Gras 
3295a645f22SBen Gras 
3305a645f22SBen Gras static inline void
unaligned_write16le(uint8_t * buf,uint16_t num)3315a645f22SBen Gras unaligned_write16le(uint8_t *buf, uint16_t num)
3325a645f22SBen Gras {
333*0a6a1f1dSLionel Sambuc 	buf[0] = (uint8_t)num;
334*0a6a1f1dSLionel Sambuc 	buf[1] = (uint8_t)(num >> 8);
3355a645f22SBen Gras 	return;
3365a645f22SBen Gras }
3375a645f22SBen Gras 
3385a645f22SBen Gras 
3395a645f22SBen Gras static inline void
unaligned_write32be(uint8_t * buf,uint32_t num)3405a645f22SBen Gras unaligned_write32be(uint8_t *buf, uint32_t num)
3415a645f22SBen Gras {
342*0a6a1f1dSLionel Sambuc 	buf[0] = (uint8_t)(num >> 24);
343*0a6a1f1dSLionel Sambuc 	buf[1] = (uint8_t)(num >> 16);
344*0a6a1f1dSLionel Sambuc 	buf[2] = (uint8_t)(num >> 8);
345*0a6a1f1dSLionel Sambuc 	buf[3] = (uint8_t)num;
3465a645f22SBen Gras 	return;
3475a645f22SBen Gras }
3485a645f22SBen Gras 
3495a645f22SBen Gras 
3505a645f22SBen Gras static inline void
unaligned_write32le(uint8_t * buf,uint32_t num)3515a645f22SBen Gras unaligned_write32le(uint8_t *buf, uint32_t num)
3525a645f22SBen Gras {
353*0a6a1f1dSLionel Sambuc 	buf[0] = (uint8_t)num;
354*0a6a1f1dSLionel Sambuc 	buf[1] = (uint8_t)(num >> 8);
355*0a6a1f1dSLionel Sambuc 	buf[2] = (uint8_t)(num >> 16);
356*0a6a1f1dSLionel Sambuc 	buf[3] = (uint8_t)(num >> 24);
3575a645f22SBen Gras 	return;
3585a645f22SBen Gras }
3595a645f22SBen Gras 
3605a645f22SBen Gras #endif
3615a645f22SBen Gras 
3625a645f22SBen Gras 
3635a645f22SBen Gras static inline uint32_t
bsr32(uint32_t n)3645a645f22SBen Gras bsr32(uint32_t n)
3655a645f22SBen Gras {
3665a645f22SBen Gras 	// Check for ICC first, since it tends to define __GNUC__ too.
3675a645f22SBen Gras #if defined(__INTEL_COMPILER)
3685a645f22SBen Gras 	return _bit_scan_reverse(n);
3695a645f22SBen Gras 
3705a645f22SBen Gras #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
3715a645f22SBen Gras 	// GCC >= 3.4 has __builtin_clz(), which gives good results on
3725a645f22SBen Gras 	// multiple architectures. On x86, __builtin_clz() ^ 31U becomes
3735a645f22SBen Gras 	// either plain BSR (so the XOR gets optimized away) or LZCNT and
3745a645f22SBen Gras 	// XOR (if -march indicates that SSE4a instructions are supported).
3755a645f22SBen Gras 	return __builtin_clz(n) ^ 31U;
3765a645f22SBen Gras 
3775a645f22SBen Gras #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
3785a645f22SBen Gras 	uint32_t i;
3795a645f22SBen Gras 	__asm__("bsrl %1, %0" : "=r" (i) : "rm" (n));
3805a645f22SBen Gras 	return i;
3815a645f22SBen Gras 
3825a645f22SBen Gras #elif defined(_MSC_VER) && _MSC_VER >= 1400
3835a645f22SBen Gras 	// MSVC isn't supported by tuklib, but since this code exists,
3845a645f22SBen Gras 	// it doesn't hurt to have it here anyway.
3855a645f22SBen Gras 	uint32_t i;
3865a645f22SBen Gras 	_BitScanReverse((DWORD *)&i, n);
3875a645f22SBen Gras 	return i;
3885a645f22SBen Gras 
3895a645f22SBen Gras #else
3905a645f22SBen Gras 	uint32_t i = 31;
3915a645f22SBen Gras 
3925a645f22SBen Gras 	if ((n & UINT32_C(0xFFFF0000)) == 0) {
3935a645f22SBen Gras 		n <<= 16;
3945a645f22SBen Gras 		i = 15;
3955a645f22SBen Gras 	}
3965a645f22SBen Gras 
3975a645f22SBen Gras 	if ((n & UINT32_C(0xFF000000)) == 0) {
3985a645f22SBen Gras 		n <<= 8;
3995a645f22SBen Gras 		i -= 8;
4005a645f22SBen Gras 	}
4015a645f22SBen Gras 
4025a645f22SBen Gras 	if ((n & UINT32_C(0xF0000000)) == 0) {
4035a645f22SBen Gras 		n <<= 4;
4045a645f22SBen Gras 		i -= 4;
4055a645f22SBen Gras 	}
4065a645f22SBen Gras 
4075a645f22SBen Gras 	if ((n & UINT32_C(0xC0000000)) == 0) {
4085a645f22SBen Gras 		n <<= 2;
4095a645f22SBen Gras 		i -= 2;
4105a645f22SBen Gras 	}
4115a645f22SBen Gras 
4125a645f22SBen Gras 	if ((n & UINT32_C(0x80000000)) == 0)
4135a645f22SBen Gras 		--i;
4145a645f22SBen Gras 
4155a645f22SBen Gras 	return i;
4165a645f22SBen Gras #endif
4175a645f22SBen Gras }
4185a645f22SBen Gras 
4195a645f22SBen Gras 
4205a645f22SBen Gras static inline uint32_t
clz32(uint32_t n)4215a645f22SBen Gras clz32(uint32_t n)
4225a645f22SBen Gras {
4235a645f22SBen Gras #if defined(__INTEL_COMPILER)
4245a645f22SBen Gras 	return _bit_scan_reverse(n) ^ 31U;
4255a645f22SBen Gras 
4265a645f22SBen Gras #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX == UINT32_MAX
4275a645f22SBen Gras 	return __builtin_clz(n);
4285a645f22SBen Gras 
4295a645f22SBen Gras #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
4305a645f22SBen Gras 	uint32_t i;
4315a645f22SBen Gras 	__asm__("bsrl %1, %0\n\t"
4325a645f22SBen Gras 		"xorl $31, %0"
4335a645f22SBen Gras 		: "=r" (i) : "rm" (n));
4345a645f22SBen Gras 	return i;
4355a645f22SBen Gras 
4365a645f22SBen Gras #elif defined(_MSC_VER) && _MSC_VER >= 1400
4375a645f22SBen Gras 	uint32_t i;
4385a645f22SBen Gras 	_BitScanReverse((DWORD *)&i, n);
4395a645f22SBen Gras 	return i ^ 31U;
4405a645f22SBen Gras 
4415a645f22SBen Gras #else
4425a645f22SBen Gras 	uint32_t i = 0;
4435a645f22SBen Gras 
4445a645f22SBen Gras 	if ((n & UINT32_C(0xFFFF0000)) == 0) {
4455a645f22SBen Gras 		n <<= 16;
4465a645f22SBen Gras 		i = 16;
4475a645f22SBen Gras 	}
4485a645f22SBen Gras 
4495a645f22SBen Gras 	if ((n & UINT32_C(0xFF000000)) == 0) {
4505a645f22SBen Gras 		n <<= 8;
4515a645f22SBen Gras 		i += 8;
4525a645f22SBen Gras 	}
4535a645f22SBen Gras 
4545a645f22SBen Gras 	if ((n & UINT32_C(0xF0000000)) == 0) {
4555a645f22SBen Gras 		n <<= 4;
4565a645f22SBen Gras 		i += 4;
4575a645f22SBen Gras 	}
4585a645f22SBen Gras 
4595a645f22SBen Gras 	if ((n & UINT32_C(0xC0000000)) == 0) {
4605a645f22SBen Gras 		n <<= 2;
4615a645f22SBen Gras 		i += 2;
4625a645f22SBen Gras 	}
4635a645f22SBen Gras 
4645a645f22SBen Gras 	if ((n & UINT32_C(0x80000000)) == 0)
4655a645f22SBen Gras 		++i;
4665a645f22SBen Gras 
4675a645f22SBen Gras 	return i;
4685a645f22SBen Gras #endif
4695a645f22SBen Gras }
4705a645f22SBen Gras 
4715a645f22SBen Gras 
4725a645f22SBen Gras static inline uint32_t
ctz32(uint32_t n)4735a645f22SBen Gras ctz32(uint32_t n)
4745a645f22SBen Gras {
4755a645f22SBen Gras #if defined(__INTEL_COMPILER)
4765a645f22SBen Gras 	return _bit_scan_forward(n);
4775a645f22SBen Gras 
4785a645f22SBen Gras #elif TUKLIB_GNUC_REQ(3, 4) && UINT_MAX >= UINT32_MAX
4795a645f22SBen Gras 	return __builtin_ctz(n);
4805a645f22SBen Gras 
4815a645f22SBen Gras #elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
4825a645f22SBen Gras 	uint32_t i;
4835a645f22SBen Gras 	__asm__("bsfl %1, %0" : "=r" (i) : "rm" (n));
4845a645f22SBen Gras 	return i;
4855a645f22SBen Gras 
4865a645f22SBen Gras #elif defined(_MSC_VER) && _MSC_VER >= 1400
4875a645f22SBen Gras 	uint32_t i;
4885a645f22SBen Gras 	_BitScanForward((DWORD *)&i, n);
4895a645f22SBen Gras 	return i;
4905a645f22SBen Gras 
4915a645f22SBen Gras #else
4925a645f22SBen Gras 	uint32_t i = 0;
4935a645f22SBen Gras 
4945a645f22SBen Gras 	if ((n & UINT32_C(0x0000FFFF)) == 0) {
4955a645f22SBen Gras 		n >>= 16;
4965a645f22SBen Gras 		i = 16;
4975a645f22SBen Gras 	}
4985a645f22SBen Gras 
4995a645f22SBen Gras 	if ((n & UINT32_C(0x000000FF)) == 0) {
5005a645f22SBen Gras 		n >>= 8;
5015a645f22SBen Gras 		i += 8;
5025a645f22SBen Gras 	}
5035a645f22SBen Gras 
5045a645f22SBen Gras 	if ((n & UINT32_C(0x0000000F)) == 0) {
5055a645f22SBen Gras 		n >>= 4;
5065a645f22SBen Gras 		i += 4;
5075a645f22SBen Gras 	}
5085a645f22SBen Gras 
5095a645f22SBen Gras 	if ((n & UINT32_C(0x00000003)) == 0) {
5105a645f22SBen Gras 		n >>= 2;
5115a645f22SBen Gras 		i += 2;
5125a645f22SBen Gras 	}
5135a645f22SBen Gras 
5145a645f22SBen Gras 	if ((n & UINT32_C(0x00000001)) == 0)
5155a645f22SBen Gras 		++i;
5165a645f22SBen Gras 
5175a645f22SBen Gras 	return i;
5185a645f22SBen Gras #endif
5195a645f22SBen Gras }
5205a645f22SBen Gras 
5215a645f22SBen Gras #define bsf32 ctz32
5225a645f22SBen Gras 
5235a645f22SBen Gras #endif
524