103467a24Schristos /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. 2*e663ba6eSchristos Copyright (C) 1991-2024 Free Software Foundation, Inc. 303467a24Schristos 403467a24Schristos This file is part of the GNU C Library. 503467a24Schristos 603467a24Schristos The GNU C Library is free software; you can redistribute it and/or 703467a24Schristos modify it under the terms of the GNU Lesser General Public 803467a24Schristos License as published by the Free Software Foundation; either 903467a24Schristos version 2.1 of the License, or (at your option) any later version. 1003467a24Schristos 1103467a24Schristos In addition to the permissions in the GNU Lesser General Public 1203467a24Schristos License, the Free Software Foundation gives you unlimited 1303467a24Schristos permission to link the compiled version of this file into 1403467a24Schristos combinations with other programs, and to distribute those 1503467a24Schristos combinations without any restriction coming from the use of this 1603467a24Schristos file. (The Lesser General Public License restrictions do apply in 1703467a24Schristos other respects; for example, they cover modification of the file, 1803467a24Schristos and distribution when not linked into a combine executable.) 1903467a24Schristos 2003467a24Schristos The GNU C Library is distributed in the hope that it will be useful, 2103467a24Schristos but WITHOUT ANY WARRANTY; without even the implied warranty of 2203467a24Schristos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2303467a24Schristos Lesser General Public License for more details. 2403467a24Schristos 2503467a24Schristos You should have received a copy of the GNU Lesser General Public 2603467a24Schristos License along with the GNU C Library; if not, see 2703467a24Schristos <http://www.gnu.org/licenses/>. */ 2803467a24Schristos 2903467a24Schristos /* You have to define the following before including this file: 3003467a24Schristos 3103467a24Schristos UWtype -- An unsigned type, default type for operations (typically a "word") 3203467a24Schristos UHWtype -- An unsigned type, at least half the size of UWtype. 3303467a24Schristos UDWtype -- An unsigned type, at least twice as large a UWtype 3403467a24Schristos W_TYPE_SIZE -- size in bits of UWtype 3503467a24Schristos 3603467a24Schristos UQItype -- Unsigned 8 bit type. 3703467a24Schristos SItype, USItype -- Signed and unsigned 32 bit types. 3803467a24Schristos DItype, UDItype -- Signed and unsigned 64 bit types. 3903467a24Schristos 4003467a24Schristos On a 32 bit machine UWtype should typically be USItype; 4103467a24Schristos on a 64 bit machine, UWtype should typically be UDItype. */ 4203467a24Schristos 4303467a24Schristos #define __BITS4 (W_TYPE_SIZE / 4) 4403467a24Schristos #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) 4503467a24Schristos #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) 4603467a24Schristos #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) 4703467a24Schristos 4803467a24Schristos #ifndef W_TYPE_SIZE 4903467a24Schristos #define W_TYPE_SIZE 32 5003467a24Schristos #define UWtype USItype 5103467a24Schristos #define UHWtype USItype 5203467a24Schristos #define UDWtype UDItype 5303467a24Schristos #endif 5403467a24Schristos 5503467a24Schristos /* Used in glibc only. */ 5603467a24Schristos #ifndef attribute_hidden 5703467a24Schristos #define attribute_hidden 5803467a24Schristos #endif 5903467a24Schristos 6003467a24Schristos extern const UQItype __clz_tab[256] attribute_hidden; 6103467a24Schristos 6203467a24Schristos /* Define auxiliary asm macros. 6303467a24Schristos 6403467a24Schristos 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two 6503467a24Schristos UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype 6603467a24Schristos word product in HIGH_PROD and LOW_PROD. 6703467a24Schristos 6803467a24Schristos 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a 6903467a24Schristos UDWtype product. This is just a variant of umul_ppmm. 7003467a24Schristos 7103467a24Schristos 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 7203467a24Schristos denominator) divides a UDWtype, composed by the UWtype integers 7303467a24Schristos HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient 7403467a24Schristos in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less 7503467a24Schristos than DENOMINATOR for correct operation. If, in addition, the most 7603467a24Schristos significant bit of DENOMINATOR must be 1, then the pre-processor symbol 7703467a24Schristos UDIV_NEEDS_NORMALIZATION is defined to 1. 7803467a24Schristos 7903467a24Schristos 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, 8003467a24Schristos denominator). Like udiv_qrnnd but the numbers are signed. The quotient 8103467a24Schristos is rounded towards 0. 8203467a24Schristos 8303467a24Schristos 5) count_leading_zeros(count, x) counts the number of zero-bits from the 8403467a24Schristos msb to the first nonzero bit in the UWtype X. This is the number of 8503467a24Schristos steps X needs to be shifted left to set the msb. Undefined for X == 0, 8603467a24Schristos unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. 8703467a24Schristos 8803467a24Schristos 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts 8903467a24Schristos from the least significant end. 9003467a24Schristos 9103467a24Schristos 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, 9203467a24Schristos high_addend_2, low_addend_2) adds two UWtype integers, composed by 9303467a24Schristos HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 9403467a24Schristos respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow 9503467a24Schristos (i.e. carry out) is not stored anywhere, and is lost. 9603467a24Schristos 9703467a24Schristos 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, 9803467a24Schristos high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, 9903467a24Schristos composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and 10003467a24Schristos LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE 10103467a24Schristos and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, 10203467a24Schristos and is lost. 10303467a24Schristos 10403467a24Schristos If any of these macros are left undefined for a particular CPU, 10503467a24Schristos C macros are used. */ 10603467a24Schristos 10703467a24Schristos /* The CPUs come in alphabetical order below. 10803467a24Schristos 10903467a24Schristos Please add support for more CPUs here, or improve the current support 11003467a24Schristos for the CPUs below! 11103467a24Schristos (E.g. WE32100, IBM360.) */ 11203467a24Schristos 11303467a24Schristos #if defined (__GNUC__) && !defined (NO_ASM) 11403467a24Schristos 11503467a24Schristos /* We sometimes need to clobber "cc" with gcc2, but that would not be 11603467a24Schristos understood by gcc1. Use cpp to avoid major code duplication. */ 11703467a24Schristos #if __GNUC__ < 2 11803467a24Schristos #define __CLOBBER_CC 11903467a24Schristos #define __AND_CLOBBER_CC 12003467a24Schristos #else /* __GNUC__ >= 2 */ 12103467a24Schristos #define __CLOBBER_CC : "cc" 12203467a24Schristos #define __AND_CLOBBER_CC , "cc" 12303467a24Schristos #endif /* __GNUC__ < 2 */ 12403467a24Schristos 125968cf8f2Schristos #if defined (__aarch64__) 126968cf8f2Schristos 127968cf8f2Schristos #if W_TYPE_SIZE == 32 128968cf8f2Schristos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 129968cf8f2Schristos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 130968cf8f2Schristos #define COUNT_LEADING_ZEROS_0 32 131968cf8f2Schristos #endif /* W_TYPE_SIZE == 32 */ 132968cf8f2Schristos 133968cf8f2Schristos #if W_TYPE_SIZE == 64 134968cf8f2Schristos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) 135968cf8f2Schristos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) 136968cf8f2Schristos #define COUNT_LEADING_ZEROS_0 64 137968cf8f2Schristos #endif /* W_TYPE_SIZE == 64 */ 138968cf8f2Schristos 139968cf8f2Schristos #endif /* __aarch64__ */ 140968cf8f2Schristos 14103467a24Schristos #if defined (__alpha) && W_TYPE_SIZE == 64 142ba340e45Schristos /* There is a bug in g++ before version 5 that 143ba340e45Schristos errors on __builtin_alpha_umulh. */ 144ba340e45Schristos #if !defined(__cplusplus) || __GNUC__ >= 5 14503467a24Schristos #define umul_ppmm(ph, pl, m0, m1) \ 14603467a24Schristos do { \ 14703467a24Schristos UDItype __m0 = (m0), __m1 = (m1); \ 14803467a24Schristos (ph) = __builtin_alpha_umulh (__m0, __m1); \ 14903467a24Schristos (pl) = __m0 * __m1; \ 15003467a24Schristos } while (0) 15103467a24Schristos #define UMUL_TIME 46 152ba340e45Schristos #endif /* !c++ */ 15303467a24Schristos #ifndef LONGLONG_STANDALONE 15403467a24Schristos #define udiv_qrnnd(q, r, n1, n0, d) \ 15503467a24Schristos do { UDItype __r; \ 15603467a24Schristos (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ 15703467a24Schristos (r) = __r; \ 15803467a24Schristos } while (0) 15903467a24Schristos extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); 16003467a24Schristos #define UDIV_TIME 220 16103467a24Schristos #endif /* LONGLONG_STANDALONE */ 16203467a24Schristos #ifdef __alpha_cix__ 16303467a24Schristos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) 16403467a24Schristos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) 16503467a24Schristos #define COUNT_LEADING_ZEROS_0 64 16603467a24Schristos #else 16703467a24Schristos #define count_leading_zeros(COUNT,X) \ 16803467a24Schristos do { \ 16903467a24Schristos UDItype __xr = (X), __t, __a; \ 17003467a24Schristos __t = __builtin_alpha_cmpbge (0, __xr); \ 17103467a24Schristos __a = __clz_tab[__t ^ 0xff] - 1; \ 17203467a24Schristos __t = __builtin_alpha_extbl (__xr, __a); \ 17303467a24Schristos (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ 17403467a24Schristos } while (0) 17503467a24Schristos #define count_trailing_zeros(COUNT,X) \ 17603467a24Schristos do { \ 17703467a24Schristos UDItype __xr = (X), __t, __a; \ 17803467a24Schristos __t = __builtin_alpha_cmpbge (0, __xr); \ 17903467a24Schristos __t = ~__t & -~__t; \ 18003467a24Schristos __a = ((__t & 0xCC) != 0) * 2; \ 18103467a24Schristos __a += ((__t & 0xF0) != 0) * 4; \ 18203467a24Schristos __a += ((__t & 0xAA) != 0); \ 18303467a24Schristos __t = __builtin_alpha_extbl (__xr, __a); \ 18403467a24Schristos __a <<= 3; \ 18503467a24Schristos __t &= -__t; \ 18603467a24Schristos __a += ((__t & 0xCC) != 0) * 2; \ 18703467a24Schristos __a += ((__t & 0xF0) != 0) * 4; \ 18803467a24Schristos __a += ((__t & 0xAA) != 0); \ 18903467a24Schristos (COUNT) = __a; \ 19003467a24Schristos } while (0) 19103467a24Schristos #endif /* __alpha_cix__ */ 19203467a24Schristos #endif /* __alpha */ 19303467a24Schristos 19403467a24Schristos #if defined (__arc__) && W_TYPE_SIZE == 32 19503467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 19603467a24Schristos __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \ 19703467a24Schristos : "=r" ((USItype) (sh)), \ 19803467a24Schristos "=&r" ((USItype) (sl)) \ 19903467a24Schristos : "%r" ((USItype) (ah)), \ 2004559860eSchristos "rICal" ((USItype) (bh)), \ 20103467a24Schristos "%r" ((USItype) (al)), \ 2028dffb485Schristos "rICal" ((USItype) (bl)) \ 2038dffb485Schristos : "cc") 20403467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 20503467a24Schristos __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \ 20603467a24Schristos : "=r" ((USItype) (sh)), \ 20703467a24Schristos "=&r" ((USItype) (sl)) \ 20803467a24Schristos : "r" ((USItype) (ah)), \ 2094559860eSchristos "rICal" ((USItype) (bh)), \ 21003467a24Schristos "r" ((USItype) (al)), \ 2118dffb485Schristos "rICal" ((USItype) (bl)) \ 2128dffb485Schristos : "cc") 21303467a24Schristos 21403467a24Schristos #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) 21503467a24Schristos #ifdef __ARC_NORM__ 21603467a24Schristos #define count_leading_zeros(count, x) \ 21703467a24Schristos do \ 21803467a24Schristos { \ 21903467a24Schristos SItype c_; \ 22003467a24Schristos \ 22103467a24Schristos __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\ 22203467a24Schristos (count) = c_ + 1; \ 22303467a24Schristos } \ 22403467a24Schristos while (0) 22503467a24Schristos #define COUNT_LEADING_ZEROS_0 32 2264559860eSchristos #endif /* __ARC_NORM__ */ 2274559860eSchristos #endif /* __arc__ */ 22803467a24Schristos 22903467a24Schristos #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ 23003467a24Schristos && W_TYPE_SIZE == 32 23103467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 23203467a24Schristos __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \ 23303467a24Schristos : "=r" ((USItype) (sh)), \ 23403467a24Schristos "=&r" ((USItype) (sl)) \ 23503467a24Schristos : "%r" ((USItype) (ah)), \ 23603467a24Schristos "rI" ((USItype) (bh)), \ 23703467a24Schristos "%r" ((USItype) (al)), \ 23803467a24Schristos "rI" ((USItype) (bl)) __CLOBBER_CC) 23903467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 24003467a24Schristos __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \ 24103467a24Schristos : "=r" ((USItype) (sh)), \ 24203467a24Schristos "=&r" ((USItype) (sl)) \ 24303467a24Schristos : "r" ((USItype) (ah)), \ 24403467a24Schristos "rI" ((USItype) (bh)), \ 24503467a24Schristos "r" ((USItype) (al)), \ 24603467a24Schristos "rI" ((USItype) (bl)) __CLOBBER_CC) 24703467a24Schristos # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ 24803467a24Schristos || defined(__ARM_ARCH_3__) 24903467a24Schristos # define umul_ppmm(xh, xl, a, b) \ 25003467a24Schristos do { \ 25103467a24Schristos register USItype __t0, __t1, __t2; \ 25203467a24Schristos __asm__ ("%@ Inlined umul_ppmm\n" \ 25303467a24Schristos " mov %2, %5, lsr #16\n" \ 25403467a24Schristos " mov %0, %6, lsr #16\n" \ 25503467a24Schristos " bic %3, %5, %2, lsl #16\n" \ 25603467a24Schristos " bic %4, %6, %0, lsl #16\n" \ 25703467a24Schristos " mul %1, %3, %4\n" \ 25803467a24Schristos " mul %4, %2, %4\n" \ 25903467a24Schristos " mul %3, %0, %3\n" \ 26003467a24Schristos " mul %0, %2, %0\n" \ 26103467a24Schristos " adds %3, %4, %3\n" \ 26203467a24Schristos " addcs %0, %0, #65536\n" \ 26303467a24Schristos " adds %1, %1, %3, lsl #16\n" \ 26403467a24Schristos " adc %0, %0, %3, lsr #16" \ 26503467a24Schristos : "=&r" ((USItype) (xh)), \ 26603467a24Schristos "=r" ((USItype) (xl)), \ 26703467a24Schristos "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ 26803467a24Schristos : "r" ((USItype) (a)), \ 26903467a24Schristos "r" ((USItype) (b)) __CLOBBER_CC ); \ 27003467a24Schristos } while (0) 27103467a24Schristos # define UMUL_TIME 20 27203467a24Schristos # else 27303467a24Schristos # define umul_ppmm(xh, xl, a, b) \ 27403467a24Schristos do { \ 27503467a24Schristos /* Generate umull, under compiler control. */ \ 27603467a24Schristos register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ 27703467a24Schristos (xl) = (USItype)__t0; \ 27803467a24Schristos (xh) = (USItype)(__t0 >> 32); \ 27903467a24Schristos } while (0) 28003467a24Schristos # define UMUL_TIME 3 28103467a24Schristos # endif 28203467a24Schristos # define UDIV_TIME 100 28303467a24Schristos #endif /* __arm__ */ 28403467a24Schristos 28503467a24Schristos #if defined(__arm__) 28603467a24Schristos /* Let gcc decide how best to implement count_leading_zeros. */ 28703467a24Schristos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 28803467a24Schristos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) 28903467a24Schristos #define COUNT_LEADING_ZEROS_0 32 29003467a24Schristos #endif 29103467a24Schristos 29203467a24Schristos #if defined (__AVR__) 29303467a24Schristos 29403467a24Schristos #if W_TYPE_SIZE == 16 29503467a24Schristos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 29603467a24Schristos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) 29703467a24Schristos #define COUNT_LEADING_ZEROS_0 16 29803467a24Schristos #endif /* W_TYPE_SIZE == 16 */ 29903467a24Schristos 30003467a24Schristos #if W_TYPE_SIZE == 32 30103467a24Schristos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) 30203467a24Schristos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) 30303467a24Schristos #define COUNT_LEADING_ZEROS_0 32 30403467a24Schristos #endif /* W_TYPE_SIZE == 32 */ 30503467a24Schristos 30603467a24Schristos #if W_TYPE_SIZE == 64 30703467a24Schristos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X)) 30803467a24Schristos #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X)) 30903467a24Schristos #define COUNT_LEADING_ZEROS_0 64 31003467a24Schristos #endif /* W_TYPE_SIZE == 64 */ 31103467a24Schristos 31203467a24Schristos #endif /* defined (__AVR__) */ 31303467a24Schristos 31403467a24Schristos #if defined (__CRIS__) 31503467a24Schristos 31603467a24Schristos #if __CRIS_arch_version >= 3 31703467a24Schristos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 31803467a24Schristos #define COUNT_LEADING_ZEROS_0 32 31903467a24Schristos #endif /* __CRIS_arch_version >= 3 */ 32003467a24Schristos 32103467a24Schristos #if __CRIS_arch_version >= 8 32203467a24Schristos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 32303467a24Schristos #endif /* __CRIS_arch_version >= 8 */ 32403467a24Schristos 32503467a24Schristos #if __CRIS_arch_version >= 10 32603467a24Schristos #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v)) 32703467a24Schristos #else 32803467a24Schristos #define __umulsidi3 __umulsidi3 32903467a24Schristos extern UDItype __umulsidi3 (USItype, USItype); 33003467a24Schristos #endif /* __CRIS_arch_version >= 10 */ 33103467a24Schristos 33203467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 33303467a24Schristos do { \ 33403467a24Schristos UDItype __x = __umulsidi3 (u, v); \ 33503467a24Schristos (w0) = (USItype) (__x); \ 33603467a24Schristos (w1) = (USItype) (__x >> 32); \ 33703467a24Schristos } while (0) 33803467a24Schristos 33903467a24Schristos /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for 34003467a24Schristos DFmode ("double" intrinsics, avoiding two of the three insns handling 34103467a24Schristos carry), but defining them as open-code C composing and doing the 34203467a24Schristos operation in DImode (UDImode) shows that the DImode needs work: 34303467a24Schristos register pressure from requiring neighboring registers and the 34403467a24Schristos traffic to and from them come to dominate, in the 4.7 series. */ 34503467a24Schristos 34603467a24Schristos #endif /* defined (__CRIS__) */ 34703467a24Schristos 34803467a24Schristos #if defined (__hppa) && W_TYPE_SIZE == 32 34903467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 35003467a24Schristos __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ 35103467a24Schristos : "=r" ((USItype) (sh)), \ 35203467a24Schristos "=&r" ((USItype) (sl)) \ 35303467a24Schristos : "%rM" ((USItype) (ah)), \ 35403467a24Schristos "rM" ((USItype) (bh)), \ 35503467a24Schristos "%rM" ((USItype) (al)), \ 35603467a24Schristos "rM" ((USItype) (bl))) 35703467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 35803467a24Schristos __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ 35903467a24Schristos : "=r" ((USItype) (sh)), \ 36003467a24Schristos "=&r" ((USItype) (sl)) \ 36103467a24Schristos : "rM" ((USItype) (ah)), \ 36203467a24Schristos "rM" ((USItype) (bh)), \ 36303467a24Schristos "rM" ((USItype) (al)), \ 36403467a24Schristos "rM" ((USItype) (bl))) 36503467a24Schristos #if defined (_PA_RISC1_1) 36603467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 36703467a24Schristos do { \ 36803467a24Schristos union \ 36903467a24Schristos { \ 37003467a24Schristos UDItype __f; \ 37103467a24Schristos struct {USItype __w1, __w0;} __w1w0; \ 37203467a24Schristos } __t; \ 37303467a24Schristos __asm__ ("xmpyu %1,%2,%0" \ 37403467a24Schristos : "=x" (__t.__f) \ 37503467a24Schristos : "x" ((USItype) (u)), \ 37603467a24Schristos "x" ((USItype) (v))); \ 37703467a24Schristos (w1) = __t.__w1w0.__w1; \ 37803467a24Schristos (w0) = __t.__w1w0.__w0; \ 37903467a24Schristos } while (0) 38003467a24Schristos #define UMUL_TIME 8 38103467a24Schristos #else 38203467a24Schristos #define UMUL_TIME 30 38303467a24Schristos #endif 38403467a24Schristos #define UDIV_TIME 40 38503467a24Schristos #define count_leading_zeros(count, x) \ 38603467a24Schristos do { \ 38703467a24Schristos USItype __tmp; \ 38803467a24Schristos __asm__ ( \ 38903467a24Schristos "ldi 1,%0\n" \ 39003467a24Schristos " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ 39103467a24Schristos " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\ 39203467a24Schristos " ldo 16(%0),%0 ; Yes. Perform add.\n" \ 39303467a24Schristos " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ 39403467a24Schristos " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\ 39503467a24Schristos " ldo 8(%0),%0 ; Yes. Perform add.\n" \ 39603467a24Schristos " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ 39703467a24Schristos " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\ 39803467a24Schristos " ldo 4(%0),%0 ; Yes. Perform add.\n" \ 39903467a24Schristos " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ 40003467a24Schristos " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\ 40103467a24Schristos " ldo 2(%0),%0 ; Yes. Perform add.\n" \ 40203467a24Schristos " extru %1,30,1,%1 ; Extract bit 1.\n" \ 40303467a24Schristos " sub %0,%1,%0 ; Subtract it.\n" \ 40403467a24Schristos : "=r" (count), "=r" (__tmp) : "1" (x)); \ 40503467a24Schristos } while (0) 40603467a24Schristos #endif 40703467a24Schristos 40803467a24Schristos #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 40903467a24Schristos #if !defined (__zarch__) 41003467a24Schristos #define smul_ppmm(xh, xl, m0, m1) \ 41103467a24Schristos do { \ 41203467a24Schristos union {DItype __ll; \ 41303467a24Schristos struct {USItype __h, __l;} __i; \ 41403467a24Schristos } __x; \ 41503467a24Schristos __asm__ ("lr %N0,%1\n\tmr %0,%2" \ 41603467a24Schristos : "=&r" (__x.__ll) \ 41703467a24Schristos : "r" (m0), "r" (m1)); \ 41803467a24Schristos (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ 41903467a24Schristos } while (0) 42003467a24Schristos #define sdiv_qrnnd(q, r, n1, n0, d) \ 42103467a24Schristos do { \ 42203467a24Schristos union {DItype __ll; \ 42303467a24Schristos struct {USItype __h, __l;} __i; \ 42403467a24Schristos } __x; \ 42503467a24Schristos __x.__i.__h = n1; __x.__i.__l = n0; \ 42603467a24Schristos __asm__ ("dr %0,%2" \ 42703467a24Schristos : "=r" (__x.__ll) \ 42803467a24Schristos : "0" (__x.__ll), "r" (d)); \ 42903467a24Schristos (q) = __x.__i.__l; (r) = __x.__i.__h; \ 43003467a24Schristos } while (0) 43103467a24Schristos #else 43203467a24Schristos #define smul_ppmm(xh, xl, m0, m1) \ 43303467a24Schristos do { \ 43403467a24Schristos register SItype __r0 __asm__ ("0"); \ 43503467a24Schristos register SItype __r1 __asm__ ("1") = (m0); \ 43603467a24Schristos \ 43703467a24Schristos __asm__ ("mr\t%%r0,%3" \ 43803467a24Schristos : "=r" (__r0), "=r" (__r1) \ 43903467a24Schristos : "r" (__r1), "r" (m1)); \ 44003467a24Schristos (xh) = __r0; (xl) = __r1; \ 44103467a24Schristos } while (0) 44203467a24Schristos 44303467a24Schristos #define sdiv_qrnnd(q, r, n1, n0, d) \ 44403467a24Schristos do { \ 44503467a24Schristos register SItype __r0 __asm__ ("0") = (n1); \ 44603467a24Schristos register SItype __r1 __asm__ ("1") = (n0); \ 44703467a24Schristos \ 44803467a24Schristos __asm__ ("dr\t%%r0,%4" \ 44903467a24Schristos : "=r" (__r0), "=r" (__r1) \ 45003467a24Schristos : "r" (__r0), "r" (__r1), "r" (d)); \ 45103467a24Schristos (q) = __r1; (r) = __r0; \ 45203467a24Schristos } while (0) 45303467a24Schristos #endif /* __zarch__ */ 45403467a24Schristos #endif 45503467a24Schristos 45603467a24Schristos #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 45703467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 45803467a24Schristos __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \ 45903467a24Schristos : "=r" ((USItype) (sh)), \ 46003467a24Schristos "=&r" ((USItype) (sl)) \ 46103467a24Schristos : "%0" ((USItype) (ah)), \ 46203467a24Schristos "g" ((USItype) (bh)), \ 46303467a24Schristos "%1" ((USItype) (al)), \ 46403467a24Schristos "g" ((USItype) (bl))) 46503467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 46603467a24Schristos __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \ 46703467a24Schristos : "=r" ((USItype) (sh)), \ 46803467a24Schristos "=&r" ((USItype) (sl)) \ 46903467a24Schristos : "0" ((USItype) (ah)), \ 47003467a24Schristos "g" ((USItype) (bh)), \ 47103467a24Schristos "1" ((USItype) (al)), \ 47203467a24Schristos "g" ((USItype) (bl))) 47303467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 47403467a24Schristos __asm__ ("mul{l} %3" \ 47503467a24Schristos : "=a" ((USItype) (w0)), \ 47603467a24Schristos "=d" ((USItype) (w1)) \ 47703467a24Schristos : "%0" ((USItype) (u)), \ 47803467a24Schristos "rm" ((USItype) (v))) 47903467a24Schristos #define udiv_qrnnd(q, r, n1, n0, dv) \ 48003467a24Schristos __asm__ ("div{l} %4" \ 48103467a24Schristos : "=a" ((USItype) (q)), \ 48203467a24Schristos "=d" ((USItype) (r)) \ 48303467a24Schristos : "0" ((USItype) (n0)), \ 48403467a24Schristos "1" ((USItype) (n1)), \ 48503467a24Schristos "rm" ((USItype) (dv))) 48603467a24Schristos #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 48703467a24Schristos #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 48803467a24Schristos #define UMUL_TIME 40 48903467a24Schristos #define UDIV_TIME 40 49003467a24Schristos #endif /* 80x86 */ 49103467a24Schristos 492968cf8f2Schristos #if defined (__x86_64__) && W_TYPE_SIZE == 64 49303467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 49403467a24Schristos __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \ 49503467a24Schristos : "=r" ((UDItype) (sh)), \ 49603467a24Schristos "=&r" ((UDItype) (sl)) \ 49703467a24Schristos : "%0" ((UDItype) (ah)), \ 49803467a24Schristos "rme" ((UDItype) (bh)), \ 49903467a24Schristos "%1" ((UDItype) (al)), \ 50003467a24Schristos "rme" ((UDItype) (bl))) 50103467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 50203467a24Schristos __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \ 50303467a24Schristos : "=r" ((UDItype) (sh)), \ 50403467a24Schristos "=&r" ((UDItype) (sl)) \ 50503467a24Schristos : "0" ((UDItype) (ah)), \ 50603467a24Schristos "rme" ((UDItype) (bh)), \ 50703467a24Schristos "1" ((UDItype) (al)), \ 50803467a24Schristos "rme" ((UDItype) (bl))) 50903467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 51003467a24Schristos __asm__ ("mul{q} %3" \ 51103467a24Schristos : "=a" ((UDItype) (w0)), \ 51203467a24Schristos "=d" ((UDItype) (w1)) \ 51303467a24Schristos : "%0" ((UDItype) (u)), \ 51403467a24Schristos "rm" ((UDItype) (v))) 51503467a24Schristos #define udiv_qrnnd(q, r, n1, n0, dv) \ 51603467a24Schristos __asm__ ("div{q} %4" \ 51703467a24Schristos : "=a" ((UDItype) (q)), \ 51803467a24Schristos "=d" ((UDItype) (r)) \ 51903467a24Schristos : "0" ((UDItype) (n0)), \ 52003467a24Schristos "1" ((UDItype) (n1)), \ 52103467a24Schristos "rm" ((UDItype) (dv))) 52203467a24Schristos #define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) 52303467a24Schristos #define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) 52403467a24Schristos #define UMUL_TIME 40 52503467a24Schristos #define UDIV_TIME 40 52603467a24Schristos #endif /* x86_64 */ 52703467a24Schristos 52803467a24Schristos #if defined (__i960__) && W_TYPE_SIZE == 32 52903467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 53003467a24Schristos ({union {UDItype __ll; \ 53103467a24Schristos struct {USItype __l, __h;} __i; \ 53203467a24Schristos } __xx; \ 53303467a24Schristos __asm__ ("emul %2,%1,%0" \ 53403467a24Schristos : "=d" (__xx.__ll) \ 53503467a24Schristos : "%dI" ((USItype) (u)), \ 53603467a24Schristos "dI" ((USItype) (v))); \ 53703467a24Schristos (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) 53803467a24Schristos #define __umulsidi3(u, v) \ 53903467a24Schristos ({UDItype __w; \ 54003467a24Schristos __asm__ ("emul %2,%1,%0" \ 54103467a24Schristos : "=d" (__w) \ 54203467a24Schristos : "%dI" ((USItype) (u)), \ 54303467a24Schristos "dI" ((USItype) (v))); \ 54403467a24Schristos __w; }) 54503467a24Schristos #endif /* __i960__ */ 54603467a24Schristos 54703467a24Schristos #if defined (__ia64) && W_TYPE_SIZE == 64 54803467a24Schristos /* This form encourages gcc (pre-release 3.4 at least) to emit predicated 54903467a24Schristos "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic 55003467a24Schristos code using "al<bl" arithmetically comes out making an actual 0 or 1 in a 55103467a24Schristos register, which takes an extra cycle. */ 55203467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 55303467a24Schristos do { \ 55403467a24Schristos UWtype __x; \ 55503467a24Schristos __x = (al) - (bl); \ 55603467a24Schristos if ((al) < (bl)) \ 55703467a24Schristos (sh) = (ah) - (bh) - 1; \ 55803467a24Schristos else \ 55903467a24Schristos (sh) = (ah) - (bh); \ 56003467a24Schristos (sl) = __x; \ 56103467a24Schristos } while (0) 56203467a24Schristos 56303467a24Schristos /* Do both product parts in assembly, since that gives better code with 56403467a24Schristos all gcc versions. Some callers will just use the upper part, and in 56503467a24Schristos that situation we waste an instruction, but not any cycles. */ 56603467a24Schristos #define umul_ppmm(ph, pl, m0, m1) \ 56703467a24Schristos __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \ 56803467a24Schristos : "=&f" (ph), "=f" (pl) \ 56903467a24Schristos : "f" (m0), "f" (m1)) 57003467a24Schristos #define count_leading_zeros(count, x) \ 57103467a24Schristos do { \ 57203467a24Schristos UWtype _x = (x), _y, _a, _c; \ 57303467a24Schristos __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \ 57403467a24Schristos __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \ 57503467a24Schristos _c = (_a - 1) << 3; \ 57603467a24Schristos _x >>= _c; \ 57703467a24Schristos if (_x >= 1 << 4) \ 57803467a24Schristos _x >>= 4, _c += 4; \ 57903467a24Schristos if (_x >= 1 << 2) \ 58003467a24Schristos _x >>= 2, _c += 2; \ 58103467a24Schristos _c += _x >> 1; \ 58203467a24Schristos (count) = W_TYPE_SIZE - 1 - _c; \ 58303467a24Schristos } while (0) 58403467a24Schristos /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 58503467a24Schristos based, and we don't need a special case for x==0 here */ 58603467a24Schristos #define count_trailing_zeros(count, x) \ 58703467a24Schristos do { \ 58803467a24Schristos UWtype __ctz_x = (x); \ 58903467a24Schristos __asm__ ("popcnt %0 = %1" \ 59003467a24Schristos : "=r" (count) \ 59103467a24Schristos : "r" ((__ctz_x-1) & ~__ctz_x)); \ 59203467a24Schristos } while (0) 59303467a24Schristos #define UMUL_TIME 14 59403467a24Schristos #endif 59503467a24Schristos 596*e663ba6eSchristos #ifdef __loongarch__ 597*e663ba6eSchristos # if W_TYPE_SIZE == 32 598*e663ba6eSchristos # define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 599*e663ba6eSchristos # define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 600*e663ba6eSchristos # define COUNT_LEADING_ZEROS_0 32 601*e663ba6eSchristos # elif W_TYPE_SIZE == 64 602*e663ba6eSchristos # define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) 603*e663ba6eSchristos # define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) 604*e663ba6eSchristos # define COUNT_LEADING_ZEROS_0 64 605*e663ba6eSchristos # endif 606*e663ba6eSchristos #endif 607*e663ba6eSchristos 60803467a24Schristos #if defined (__M32R__) && W_TYPE_SIZE == 32 60903467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 61003467a24Schristos /* The cmp clears the condition bit. */ \ 61103467a24Schristos __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \ 61203467a24Schristos : "=r" ((USItype) (sh)), \ 61303467a24Schristos "=&r" ((USItype) (sl)) \ 61403467a24Schristos : "0" ((USItype) (ah)), \ 61503467a24Schristos "r" ((USItype) (bh)), \ 61603467a24Schristos "1" ((USItype) (al)), \ 61703467a24Schristos "r" ((USItype) (bl)) \ 61803467a24Schristos : "cbit") 61903467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 62003467a24Schristos /* The cmp clears the condition bit. */ \ 62103467a24Schristos __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \ 62203467a24Schristos : "=r" ((USItype) (sh)), \ 62303467a24Schristos "=&r" ((USItype) (sl)) \ 62403467a24Schristos : "0" ((USItype) (ah)), \ 62503467a24Schristos "r" ((USItype) (bh)), \ 62603467a24Schristos "1" ((USItype) (al)), \ 62703467a24Schristos "r" ((USItype) (bl)) \ 62803467a24Schristos : "cbit") 62903467a24Schristos #endif /* __M32R__ */ 63003467a24Schristos 63103467a24Schristos #if defined (__mc68000__) && W_TYPE_SIZE == 32 63203467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 63303467a24Schristos __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ 63403467a24Schristos : "=d" ((USItype) (sh)), \ 63503467a24Schristos "=&d" ((USItype) (sl)) \ 63603467a24Schristos : "%0" ((USItype) (ah)), \ 63703467a24Schristos "d" ((USItype) (bh)), \ 63803467a24Schristos "%1" ((USItype) (al)), \ 63903467a24Schristos "g" ((USItype) (bl))) 64003467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 64103467a24Schristos __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ 64203467a24Schristos : "=d" ((USItype) (sh)), \ 64303467a24Schristos "=&d" ((USItype) (sl)) \ 64403467a24Schristos : "0" ((USItype) (ah)), \ 64503467a24Schristos "d" ((USItype) (bh)), \ 64603467a24Schristos "1" ((USItype) (al)), \ 64703467a24Schristos "g" ((USItype) (bl))) 64803467a24Schristos 64903467a24Schristos /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */ 65003467a24Schristos #if (defined (__mc68020__) && !defined (__mc68060__)) 65103467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 65203467a24Schristos __asm__ ("mulu%.l %3,%1:%0" \ 65303467a24Schristos : "=d" ((USItype) (w0)), \ 65403467a24Schristos "=d" ((USItype) (w1)) \ 65503467a24Schristos : "%0" ((USItype) (u)), \ 65603467a24Schristos "dmi" ((USItype) (v))) 65703467a24Schristos #define UMUL_TIME 45 65803467a24Schristos #define udiv_qrnnd(q, r, n1, n0, d) \ 65903467a24Schristos __asm__ ("divu%.l %4,%1:%0" \ 66003467a24Schristos : "=d" ((USItype) (q)), \ 66103467a24Schristos "=d" ((USItype) (r)) \ 66203467a24Schristos : "0" ((USItype) (n0)), \ 66303467a24Schristos "1" ((USItype) (n1)), \ 66403467a24Schristos "dmi" ((USItype) (d))) 66503467a24Schristos #define UDIV_TIME 90 66603467a24Schristos #define sdiv_qrnnd(q, r, n1, n0, d) \ 66703467a24Schristos __asm__ ("divs%.l %4,%1:%0" \ 66803467a24Schristos : "=d" ((USItype) (q)), \ 66903467a24Schristos "=d" ((USItype) (r)) \ 67003467a24Schristos : "0" ((USItype) (n0)), \ 67103467a24Schristos "1" ((USItype) (n1)), \ 67203467a24Schristos "dmi" ((USItype) (d))) 67303467a24Schristos 67403467a24Schristos #elif defined (__mcoldfire__) /* not mc68020 */ 67503467a24Schristos 67603467a24Schristos #define umul_ppmm(xh, xl, a, b) \ 67703467a24Schristos __asm__ ("| Inlined umul_ppmm\n" \ 67803467a24Schristos " move%.l %2,%/d0\n" \ 67903467a24Schristos " move%.l %3,%/d1\n" \ 68003467a24Schristos " move%.l %/d0,%/d2\n" \ 68103467a24Schristos " swap %/d0\n" \ 68203467a24Schristos " move%.l %/d1,%/d3\n" \ 68303467a24Schristos " swap %/d1\n" \ 68403467a24Schristos " move%.w %/d2,%/d4\n" \ 68503467a24Schristos " mulu %/d3,%/d4\n" \ 68603467a24Schristos " mulu %/d1,%/d2\n" \ 68703467a24Schristos " mulu %/d0,%/d3\n" \ 68803467a24Schristos " mulu %/d0,%/d1\n" \ 68903467a24Schristos " move%.l %/d4,%/d0\n" \ 69003467a24Schristos " clr%.w %/d0\n" \ 69103467a24Schristos " swap %/d0\n" \ 69203467a24Schristos " add%.l %/d0,%/d2\n" \ 69303467a24Schristos " add%.l %/d3,%/d2\n" \ 69403467a24Schristos " jcc 1f\n" \ 69503467a24Schristos " add%.l %#65536,%/d1\n" \ 69603467a24Schristos "1: swap %/d2\n" \ 69703467a24Schristos " moveq %#0,%/d0\n" \ 69803467a24Schristos " move%.w %/d2,%/d0\n" \ 69903467a24Schristos " move%.w %/d4,%/d2\n" \ 70003467a24Schristos " move%.l %/d2,%1\n" \ 70103467a24Schristos " add%.l %/d1,%/d0\n" \ 70203467a24Schristos " move%.l %/d0,%0" \ 70303467a24Schristos : "=g" ((USItype) (xh)), \ 70403467a24Schristos "=g" ((USItype) (xl)) \ 70503467a24Schristos : "g" ((USItype) (a)), \ 70603467a24Schristos "g" ((USItype) (b)) \ 70703467a24Schristos : "d0", "d1", "d2", "d3", "d4") 70803467a24Schristos #define UMUL_TIME 100 70903467a24Schristos #define UDIV_TIME 400 71003467a24Schristos #else /* not ColdFire */ 71103467a24Schristos /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */ 71203467a24Schristos #define umul_ppmm(xh, xl, a, b) \ 71303467a24Schristos __asm__ ("| Inlined umul_ppmm\n" \ 71403467a24Schristos " move%.l %2,%/d0\n" \ 71503467a24Schristos " move%.l %3,%/d1\n" \ 71603467a24Schristos " move%.l %/d0,%/d2\n" \ 71703467a24Schristos " swap %/d0\n" \ 71803467a24Schristos " move%.l %/d1,%/d3\n" \ 71903467a24Schristos " swap %/d1\n" \ 72003467a24Schristos " move%.w %/d2,%/d4\n" \ 72103467a24Schristos " mulu %/d3,%/d4\n" \ 72203467a24Schristos " mulu %/d1,%/d2\n" \ 72303467a24Schristos " mulu %/d0,%/d3\n" \ 72403467a24Schristos " mulu %/d0,%/d1\n" \ 72503467a24Schristos " move%.l %/d4,%/d0\n" \ 72603467a24Schristos " eor%.w %/d0,%/d0\n" \ 72703467a24Schristos " swap %/d0\n" \ 72803467a24Schristos " add%.l %/d0,%/d2\n" \ 72903467a24Schristos " add%.l %/d3,%/d2\n" \ 73003467a24Schristos " jcc 1f\n" \ 73103467a24Schristos " add%.l %#65536,%/d1\n" \ 73203467a24Schristos "1: swap %/d2\n" \ 73303467a24Schristos " moveq %#0,%/d0\n" \ 73403467a24Schristos " move%.w %/d2,%/d0\n" \ 73503467a24Schristos " move%.w %/d4,%/d2\n" \ 73603467a24Schristos " move%.l %/d2,%1\n" \ 73703467a24Schristos " add%.l %/d1,%/d0\n" \ 73803467a24Schristos " move%.l %/d0,%0" \ 73903467a24Schristos : "=g" ((USItype) (xh)), \ 74003467a24Schristos "=g" ((USItype) (xl)) \ 74103467a24Schristos : "g" ((USItype) (a)), \ 74203467a24Schristos "g" ((USItype) (b)) \ 74303467a24Schristos : "d0", "d1", "d2", "d3", "d4") 74403467a24Schristos #define UMUL_TIME 100 74503467a24Schristos #define UDIV_TIME 400 74603467a24Schristos 74703467a24Schristos #endif /* not mc68020 */ 74803467a24Schristos 74903467a24Schristos /* The '020, '030, '040 and '060 have bitfield insns. 75003467a24Schristos cpu32 disguises as a 68020, but lacks them. */ 75103467a24Schristos #if defined (__mc68020__) && !defined (__mcpu32__) 75203467a24Schristos #define count_leading_zeros(count, x) \ 75303467a24Schristos __asm__ ("bfffo %1{%b2:%b2},%0" \ 75403467a24Schristos : "=d" ((USItype) (count)) \ 75503467a24Schristos : "od" ((USItype) (x)), "n" (0)) 75603467a24Schristos /* Some ColdFire architectures have a ff1 instruction supported via 75703467a24Schristos __builtin_clz. */ 75803467a24Schristos #elif defined (__mcfisaaplus__) || defined (__mcfisac__) 75903467a24Schristos #define count_leading_zeros(count,x) ((count) = __builtin_clz (x)) 76003467a24Schristos #define COUNT_LEADING_ZEROS_0 32 76103467a24Schristos #endif 76203467a24Schristos #endif /* mc68000 */ 76303467a24Schristos 76403467a24Schristos #if defined (__m88000__) && W_TYPE_SIZE == 32 76503467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 76603467a24Schristos __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ 76703467a24Schristos : "=r" ((USItype) (sh)), \ 76803467a24Schristos "=&r" ((USItype) (sl)) \ 76903467a24Schristos : "%rJ" ((USItype) (ah)), \ 77003467a24Schristos "rJ" ((USItype) (bh)), \ 77103467a24Schristos "%rJ" ((USItype) (al)), \ 77203467a24Schristos "rJ" ((USItype) (bl))) 77303467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 77403467a24Schristos __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ 77503467a24Schristos : "=r" ((USItype) (sh)), \ 77603467a24Schristos "=&r" ((USItype) (sl)) \ 77703467a24Schristos : "rJ" ((USItype) (ah)), \ 77803467a24Schristos "rJ" ((USItype) (bh)), \ 77903467a24Schristos "rJ" ((USItype) (al)), \ 78003467a24Schristos "rJ" ((USItype) (bl))) 78103467a24Schristos #define count_leading_zeros(count, x) \ 78203467a24Schristos do { \ 78303467a24Schristos USItype __cbtmp; \ 78403467a24Schristos __asm__ ("ff1 %0,%1" \ 78503467a24Schristos : "=r" (__cbtmp) \ 78603467a24Schristos : "r" ((USItype) (x))); \ 78703467a24Schristos (count) = __cbtmp ^ 31; \ 78803467a24Schristos } while (0) 78903467a24Schristos #define COUNT_LEADING_ZEROS_0 63 /* sic */ 79003467a24Schristos #if defined (__mc88110__) 79103467a24Schristos #define umul_ppmm(wh, wl, u, v) \ 79203467a24Schristos do { \ 79303467a24Schristos union {UDItype __ll; \ 79403467a24Schristos struct {USItype __h, __l;} __i; \ 79503467a24Schristos } __xx; \ 79603467a24Schristos __asm__ ("mulu.d %0,%1,%2" \ 79703467a24Schristos : "=r" (__xx.__ll) \ 79803467a24Schristos : "r" ((USItype) (u)), \ 79903467a24Schristos "r" ((USItype) (v))); \ 80003467a24Schristos (wh) = __xx.__i.__h; \ 80103467a24Schristos (wl) = __xx.__i.__l; \ 80203467a24Schristos } while (0) 80303467a24Schristos #define udiv_qrnnd(q, r, n1, n0, d) \ 80403467a24Schristos ({union {UDItype __ll; \ 80503467a24Schristos struct {USItype __h, __l;} __i; \ 80603467a24Schristos } __xx; \ 80703467a24Schristos USItype __q; \ 80803467a24Schristos __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ 80903467a24Schristos __asm__ ("divu.d %0,%1,%2" \ 81003467a24Schristos : "=r" (__q) \ 81103467a24Schristos : "r" (__xx.__ll), \ 81203467a24Schristos "r" ((USItype) (d))); \ 81303467a24Schristos (r) = (n0) - __q * (d); (q) = __q; }) 81403467a24Schristos #define UMUL_TIME 5 81503467a24Schristos #define UDIV_TIME 25 81603467a24Schristos #else 81703467a24Schristos #define UMUL_TIME 17 81803467a24Schristos #define UDIV_TIME 150 81903467a24Schristos #endif /* __mc88110__ */ 82003467a24Schristos #endif /* __m88000__ */ 82103467a24Schristos 82203467a24Schristos #if defined (__mn10300__) 82303467a24Schristos # if defined (__AM33__) 82403467a24Schristos # define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 82503467a24Schristos # define umul_ppmm(w1, w0, u, v) \ 82603467a24Schristos asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) 82703467a24Schristos # define smul_ppmm(w1, w0, u, v) \ 82803467a24Schristos asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) 82903467a24Schristos # else 83003467a24Schristos # define umul_ppmm(w1, w0, u, v) \ 83103467a24Schristos asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) 83203467a24Schristos # define smul_ppmm(w1, w0, u, v) \ 83303467a24Schristos asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) 83403467a24Schristos # endif 83503467a24Schristos # define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 83603467a24Schristos do { \ 83703467a24Schristos DWunion __s, __a, __b; \ 83803467a24Schristos __a.s.low = (al); __a.s.high = (ah); \ 83903467a24Schristos __b.s.low = (bl); __b.s.high = (bh); \ 84003467a24Schristos __s.ll = __a.ll + __b.ll; \ 84103467a24Schristos (sl) = __s.s.low; (sh) = __s.s.high; \ 84203467a24Schristos } while (0) 84303467a24Schristos # define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 84403467a24Schristos do { \ 84503467a24Schristos DWunion __s, __a, __b; \ 84603467a24Schristos __a.s.low = (al); __a.s.high = (ah); \ 84703467a24Schristos __b.s.low = (bl); __b.s.high = (bh); \ 84803467a24Schristos __s.ll = __a.ll - __b.ll; \ 84903467a24Schristos (sl) = __s.s.low; (sh) = __s.s.high; \ 85003467a24Schristos } while (0) 85103467a24Schristos # define udiv_qrnnd(q, r, nh, nl, d) \ 85203467a24Schristos asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) 85303467a24Schristos # define sdiv_qrnnd(q, r, nh, nl, d) \ 85403467a24Schristos asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) 85503467a24Schristos # define UMUL_TIME 3 85603467a24Schristos # define UDIV_TIME 38 85703467a24Schristos #endif 85803467a24Schristos 85903467a24Schristos #if defined (__mips__) && W_TYPE_SIZE == 32 86003467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 86103467a24Schristos do { \ 86203467a24Schristos UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ 86303467a24Schristos (w1) = (USItype) (__x >> 32); \ 86403467a24Schristos (w0) = (USItype) (__x); \ 86503467a24Schristos } while (0) 86603467a24Schristos #define UMUL_TIME 10 86703467a24Schristos #define UDIV_TIME 100 86803467a24Schristos 869968cf8f2Schristos #if (__mips == 32 || __mips == 64) && ! defined (__mips16) 87003467a24Schristos #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) 87103467a24Schristos #define COUNT_LEADING_ZEROS_0 32 87203467a24Schristos #endif 87303467a24Schristos #endif /* __mips__ */ 87403467a24Schristos 87503467a24Schristos /* FIXME: We should test _IBMR2 here when we add assembly support for the 87603467a24Schristos system vendor compilers. 87703467a24Schristos FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good 87803467a24Schristos enough, since that hits ARM and m68k too. */ 87903467a24Schristos #if (defined (_ARCH_PPC) /* AIX */ \ 88003467a24Schristos || defined (__powerpc__) /* gcc */ \ 88103467a24Schristos || defined (__POWERPC__) /* BEOS */ \ 88203467a24Schristos || defined (__ppc__) /* Darwin */ \ 88303467a24Schristos || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ 88403467a24Schristos || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ 88503467a24Schristos && CPU_FAMILY == PPC) \ 88603467a24Schristos ) && W_TYPE_SIZE == 32 88703467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 88803467a24Schristos do { \ 88903467a24Schristos if (__builtin_constant_p (bh) && (bh) == 0) \ 89003467a24Schristos __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ 89103467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 89203467a24Schristos else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ 89303467a24Schristos __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ 89403467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 89503467a24Schristos else \ 89603467a24Schristos __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ 89703467a24Schristos : "=r" (sh), "=&r" (sl) \ 89803467a24Schristos : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ 89903467a24Schristos } while (0) 90003467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 90103467a24Schristos do { \ 90203467a24Schristos if (__builtin_constant_p (ah) && (ah) == 0) \ 90303467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ 90403467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 90503467a24Schristos else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ 90603467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ 90703467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 90803467a24Schristos else if (__builtin_constant_p (bh) && (bh) == 0) \ 90903467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ 91003467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 91103467a24Schristos else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ 91203467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ 91303467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 91403467a24Schristos else \ 91503467a24Schristos __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ 91603467a24Schristos : "=r" (sh), "=&r" (sl) \ 91703467a24Schristos : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ 91803467a24Schristos } while (0) 91903467a24Schristos #define count_leading_zeros(count, x) \ 92003467a24Schristos __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x)) 92103467a24Schristos #define COUNT_LEADING_ZEROS_0 32 92203467a24Schristos #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ 92303467a24Schristos || defined (__ppc__) \ 92403467a24Schristos || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ 92503467a24Schristos || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ 92603467a24Schristos && CPU_FAMILY == PPC) 92703467a24Schristos #define umul_ppmm(ph, pl, m0, m1) \ 92803467a24Schristos do { \ 92903467a24Schristos USItype __m0 = (m0), __m1 = (m1); \ 93003467a24Schristos __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 93103467a24Schristos (pl) = __m0 * __m1; \ 93203467a24Schristos } while (0) 93303467a24Schristos #define UMUL_TIME 15 93403467a24Schristos #define smul_ppmm(ph, pl, m0, m1) \ 93503467a24Schristos do { \ 93603467a24Schristos SItype __m0 = (m0), __m1 = (m1); \ 93703467a24Schristos __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 93803467a24Schristos (pl) = __m0 * __m1; \ 93903467a24Schristos } while (0) 94003467a24Schristos #define SMUL_TIME 14 94103467a24Schristos #define UDIV_TIME 120 94203467a24Schristos #endif 94303467a24Schristos #endif /* 32-bit POWER architecture variants. */ 94403467a24Schristos 94503467a24Schristos /* We should test _IBMR2 here when we add assembly support for the system 94603467a24Schristos vendor compilers. */ 94703467a24Schristos #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64 94803467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 94903467a24Schristos do { \ 95003467a24Schristos if (__builtin_constant_p (bh) && (bh) == 0) \ 95103467a24Schristos __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ 95203467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 95303467a24Schristos else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ 95403467a24Schristos __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ 95503467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ 95603467a24Schristos else \ 95703467a24Schristos __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ 95803467a24Schristos : "=r" (sh), "=&r" (sl) \ 95903467a24Schristos : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ 96003467a24Schristos } while (0) 96103467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 96203467a24Schristos do { \ 96303467a24Schristos if (__builtin_constant_p (ah) && (ah) == 0) \ 96403467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ 96503467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 96603467a24Schristos else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ 96703467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ 96803467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ 96903467a24Schristos else if (__builtin_constant_p (bh) && (bh) == 0) \ 97003467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ 97103467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 97203467a24Schristos else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ 97303467a24Schristos __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ 97403467a24Schristos : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ 97503467a24Schristos else \ 97603467a24Schristos __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ 97703467a24Schristos : "=r" (sh), "=&r" (sl) \ 97803467a24Schristos : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ 97903467a24Schristos } while (0) 98003467a24Schristos #define count_leading_zeros(count, x) \ 98103467a24Schristos __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) 98203467a24Schristos #define COUNT_LEADING_ZEROS_0 64 98303467a24Schristos #define umul_ppmm(ph, pl, m0, m1) \ 98403467a24Schristos do { \ 98503467a24Schristos UDItype __m0 = (m0), __m1 = (m1); \ 98603467a24Schristos __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 98703467a24Schristos (pl) = __m0 * __m1; \ 98803467a24Schristos } while (0) 98903467a24Schristos #define UMUL_TIME 15 99003467a24Schristos #define smul_ppmm(ph, pl, m0, m1) \ 99103467a24Schristos do { \ 99203467a24Schristos DItype __m0 = (m0), __m1 = (m1); \ 99303467a24Schristos __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ 99403467a24Schristos (pl) = __m0 * __m1; \ 99503467a24Schristos } while (0) 99603467a24Schristos #define SMUL_TIME 14 /* ??? */ 99703467a24Schristos #define UDIV_TIME 120 /* ??? */ 99803467a24Schristos #endif /* 64-bit PowerPC. */ 99903467a24Schristos 100003467a24Schristos #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 100103467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 100203467a24Schristos __asm__ ("a %1,%5\n\tae %0,%3" \ 100303467a24Schristos : "=r" ((USItype) (sh)), \ 100403467a24Schristos "=&r" ((USItype) (sl)) \ 100503467a24Schristos : "%0" ((USItype) (ah)), \ 100603467a24Schristos "r" ((USItype) (bh)), \ 100703467a24Schristos "%1" ((USItype) (al)), \ 100803467a24Schristos "r" ((USItype) (bl))) 100903467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 101003467a24Schristos __asm__ ("s %1,%5\n\tse %0,%3" \ 101103467a24Schristos : "=r" ((USItype) (sh)), \ 101203467a24Schristos "=&r" ((USItype) (sl)) \ 101303467a24Schristos : "0" ((USItype) (ah)), \ 101403467a24Schristos "r" ((USItype) (bh)), \ 101503467a24Schristos "1" ((USItype) (al)), \ 101603467a24Schristos "r" ((USItype) (bl))) 101703467a24Schristos #define umul_ppmm(ph, pl, m0, m1) \ 101803467a24Schristos do { \ 101903467a24Schristos USItype __m0 = (m0), __m1 = (m1); \ 102003467a24Schristos __asm__ ( \ 102103467a24Schristos "s r2,r2\n" \ 102203467a24Schristos " mts r10,%2\n" \ 102303467a24Schristos " m r2,%3\n" \ 102403467a24Schristos " m r2,%3\n" \ 102503467a24Schristos " m r2,%3\n" \ 102603467a24Schristos " m r2,%3\n" \ 102703467a24Schristos " m r2,%3\n" \ 102803467a24Schristos " m r2,%3\n" \ 102903467a24Schristos " m r2,%3\n" \ 103003467a24Schristos " m r2,%3\n" \ 103103467a24Schristos " m r2,%3\n" \ 103203467a24Schristos " m r2,%3\n" \ 103303467a24Schristos " m r2,%3\n" \ 103403467a24Schristos " m r2,%3\n" \ 103503467a24Schristos " m r2,%3\n" \ 103603467a24Schristos " m r2,%3\n" \ 103703467a24Schristos " m r2,%3\n" \ 103803467a24Schristos " m r2,%3\n" \ 103903467a24Schristos " cas %0,r2,r0\n" \ 104003467a24Schristos " mfs r10,%1" \ 104103467a24Schristos : "=r" ((USItype) (ph)), \ 104203467a24Schristos "=r" ((USItype) (pl)) \ 104303467a24Schristos : "%r" (__m0), \ 104403467a24Schristos "r" (__m1) \ 104503467a24Schristos : "r2"); \ 104603467a24Schristos (ph) += ((((SItype) __m0 >> 31) & __m1) \ 104703467a24Schristos + (((SItype) __m1 >> 31) & __m0)); \ 104803467a24Schristos } while (0) 104903467a24Schristos #define UMUL_TIME 20 105003467a24Schristos #define UDIV_TIME 200 105103467a24Schristos #define count_leading_zeros(count, x) \ 105203467a24Schristos do { \ 105303467a24Schristos if ((x) >= 0x10000) \ 105403467a24Schristos __asm__ ("clz %0,%1" \ 105503467a24Schristos : "=r" ((USItype) (count)) \ 105603467a24Schristos : "r" ((USItype) (x) >> 16)); \ 105703467a24Schristos else \ 105803467a24Schristos { \ 105903467a24Schristos __asm__ ("clz %0,%1" \ 106003467a24Schristos : "=r" ((USItype) (count)) \ 106103467a24Schristos : "r" ((USItype) (x))); \ 106203467a24Schristos (count) += 16; \ 106303467a24Schristos } \ 106403467a24Schristos } while (0) 106503467a24Schristos #endif 106603467a24Schristos 10674559860eSchristos #if defined(__riscv) 10684559860eSchristos #ifdef __riscv_mul 10694559860eSchristos #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v)) 10704559860eSchristos #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b)) 10714559860eSchristos #else 10724559860eSchristos #if __riscv_xlen == 32 10734559860eSchristos #define MULUW3 "call __mulsi3" 10744559860eSchristos #elif __riscv_xlen == 64 10754559860eSchristos #define MULUW3 "call __muldi3" 10764559860eSchristos #else 10774559860eSchristos #error unsupport xlen 10784559860eSchristos #endif /* __riscv_xlen */ 10794559860eSchristos /* We rely on the fact that MULUW3 doesn't clobber the t-registers. 10804559860eSchristos It can get better register allocation result. */ 10814559860eSchristos #define __muluw3(a, b) \ 10824559860eSchristos ({ \ 10834559860eSchristos register UWtype __op0 asm ("a0") = a; \ 10844559860eSchristos register UWtype __op1 asm ("a1") = b; \ 10854559860eSchristos asm volatile (MULUW3 \ 10864559860eSchristos : "+r" (__op0), "+r" (__op1) \ 10874559860eSchristos : \ 10884559860eSchristos : "ra", "a2", "a3"); \ 10894559860eSchristos __op0; \ 10904559860eSchristos }) 10914559860eSchristos #endif /* __riscv_mul */ 10924559860eSchristos #define umul_ppmm(w1, w0, u, v) \ 10934559860eSchristos do { \ 10944559860eSchristos UWtype __x0, __x1, __x2, __x3; \ 10954559860eSchristos UHWtype __ul, __vl, __uh, __vh; \ 10964559860eSchristos \ 10974559860eSchristos __ul = __ll_lowpart (u); \ 10984559860eSchristos __uh = __ll_highpart (u); \ 10994559860eSchristos __vl = __ll_lowpart (v); \ 11004559860eSchristos __vh = __ll_highpart (v); \ 11014559860eSchristos \ 11024559860eSchristos __x0 = __muluw3 (__ul, __vl); \ 11034559860eSchristos __x1 = __muluw3 (__ul, __vh); \ 11044559860eSchristos __x2 = __muluw3 (__uh, __vl); \ 11054559860eSchristos __x3 = __muluw3 (__uh, __vh); \ 11064559860eSchristos \ 11074559860eSchristos __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 11084559860eSchristos __x1 += __x2; /* but this indeed can */ \ 11094559860eSchristos if (__x1 < __x2) /* did we get it? */ \ 11104559860eSchristos __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 11114559860eSchristos \ 11124559860eSchristos (w1) = __x3 + __ll_highpart (__x1); \ 11134559860eSchristos (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ 11144559860eSchristos } while (0) 11154559860eSchristos #endif /* __riscv */ 11164559860eSchristos 11174559860eSchristos #if defined(__sh__) && W_TYPE_SIZE == 32 111803467a24Schristos #ifndef __sh1__ 111903467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 112003467a24Schristos __asm__ ( \ 112103467a24Schristos "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \ 112203467a24Schristos : "=r<" ((USItype)(w1)), \ 112303467a24Schristos "=r<" ((USItype)(w0)) \ 112403467a24Schristos : "r" ((USItype)(u)), \ 112503467a24Schristos "r" ((USItype)(v)) \ 112603467a24Schristos : "macl", "mach") 112703467a24Schristos #define UMUL_TIME 5 112803467a24Schristos #endif 112903467a24Schristos 113003467a24Schristos /* This is the same algorithm as __udiv_qrnnd_c. */ 113103467a24Schristos #define UDIV_NEEDS_NORMALIZATION 1 113203467a24Schristos 1133ba340e45Schristos #ifdef __FDPIC__ 1134ba340e45Schristos /* FDPIC needs a special version of the asm fragment to extract the 1135ba340e45Schristos code address from the function descriptor. __udiv_qrnnd_16 is 1136ba340e45Schristos assumed to be local and not to use the GOT, so loading r12 is 1137ba340e45Schristos not needed. */ 1138ba340e45Schristos #define udiv_qrnnd(q, r, n1, n0, d) \ 1139ba340e45Schristos do { \ 1140ba340e45Schristos extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ 1141ba340e45Schristos __attribute__ ((visibility ("hidden"))); \ 1142ba340e45Schristos /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ 1143ba340e45Schristos __asm__ ( \ 1144ba340e45Schristos "mov%M4 %4,r5\n" \ 1145ba340e45Schristos " swap.w %3,r4\n" \ 1146ba340e45Schristos " swap.w r5,r6\n" \ 1147ba340e45Schristos " mov.l @%5,r2\n" \ 1148ba340e45Schristos " jsr @r2\n" \ 1149ba340e45Schristos " shll16 r6\n" \ 1150ba340e45Schristos " swap.w r4,r4\n" \ 1151ba340e45Schristos " mov.l @%5,r2\n" \ 1152ba340e45Schristos " jsr @r2\n" \ 1153ba340e45Schristos " swap.w r1,%0\n" \ 1154ba340e45Schristos " or r1,%0" \ 1155ba340e45Schristos : "=r" (q), "=&z" (r) \ 1156ba340e45Schristos : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ 1157ba340e45Schristos : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ 1158ba340e45Schristos } while (0) 1159ba340e45Schristos #else 116003467a24Schristos #define udiv_qrnnd(q, r, n1, n0, d) \ 116103467a24Schristos do { \ 116203467a24Schristos extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ 116303467a24Schristos __attribute__ ((visibility ("hidden"))); \ 116403467a24Schristos /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ 116503467a24Schristos __asm__ ( \ 116603467a24Schristos "mov%M4 %4,r5\n" \ 116703467a24Schristos " swap.w %3,r4\n" \ 116803467a24Schristos " swap.w r5,r6\n" \ 116903467a24Schristos " jsr @%5\n" \ 117003467a24Schristos " shll16 r6\n" \ 117103467a24Schristos " swap.w r4,r4\n" \ 117203467a24Schristos " jsr @%5\n" \ 117303467a24Schristos " swap.w r1,%0\n" \ 117403467a24Schristos " or r1,%0" \ 117503467a24Schristos : "=r" (q), "=&z" (r) \ 117603467a24Schristos : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ 117703467a24Schristos : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ 117803467a24Schristos } while (0) 1179ba340e45Schristos #endif /* __FDPIC__ */ 118003467a24Schristos 118103467a24Schristos #define UDIV_TIME 80 118203467a24Schristos 118303467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 118403467a24Schristos __asm__ ("clrt;subc %5,%1; subc %4,%0" \ 118503467a24Schristos : "=r" (sh), "=r" (sl) \ 118603467a24Schristos : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t") 118703467a24Schristos 118803467a24Schristos #endif /* __sh__ */ 118903467a24Schristos 119003467a24Schristos #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \ 119103467a24Schristos && W_TYPE_SIZE == 32 119203467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 119303467a24Schristos __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ 119403467a24Schristos : "=r" ((USItype) (sh)), \ 119503467a24Schristos "=&r" ((USItype) (sl)) \ 119603467a24Schristos : "%rJ" ((USItype) (ah)), \ 119703467a24Schristos "rI" ((USItype) (bh)), \ 119803467a24Schristos "%rJ" ((USItype) (al)), \ 119903467a24Schristos "rI" ((USItype) (bl)) \ 120003467a24Schristos __CLOBBER_CC) 120103467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 120203467a24Schristos __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ 120303467a24Schristos : "=r" ((USItype) (sh)), \ 120403467a24Schristos "=&r" ((USItype) (sl)) \ 120503467a24Schristos : "rJ" ((USItype) (ah)), \ 120603467a24Schristos "rI" ((USItype) (bh)), \ 120703467a24Schristos "rJ" ((USItype) (al)), \ 120803467a24Schristos "rI" ((USItype) (bl)) \ 120903467a24Schristos __CLOBBER_CC) 121003467a24Schristos #if defined (__sparc_v9__) 121103467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 121203467a24Schristos do { \ 121303467a24Schristos register USItype __g1 asm ("g1"); \ 121403467a24Schristos __asm__ ("umul\t%2,%3,%1\n\t" \ 121503467a24Schristos "srlx\t%1, 32, %0" \ 121603467a24Schristos : "=r" ((USItype) (w1)), \ 121703467a24Schristos "=r" (__g1) \ 121803467a24Schristos : "r" ((USItype) (u)), \ 121903467a24Schristos "r" ((USItype) (v))); \ 122003467a24Schristos (w0) = __g1; \ 122103467a24Schristos } while (0) 122203467a24Schristos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 122303467a24Schristos __asm__ ("mov\t%2,%%y\n\t" \ 122403467a24Schristos "udiv\t%3,%4,%0\n\t" \ 122503467a24Schristos "umul\t%0,%4,%1\n\t" \ 122603467a24Schristos "sub\t%3,%1,%1" \ 122703467a24Schristos : "=&r" ((USItype) (__q)), \ 122803467a24Schristos "=&r" ((USItype) (__r)) \ 122903467a24Schristos : "r" ((USItype) (__n1)), \ 123003467a24Schristos "r" ((USItype) (__n0)), \ 123103467a24Schristos "r" ((USItype) (__d))) 123203467a24Schristos #else 123303467a24Schristos #if defined (__sparc_v8__) 123403467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 123503467a24Schristos __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 123603467a24Schristos : "=r" ((USItype) (w1)), \ 123703467a24Schristos "=r" ((USItype) (w0)) \ 123803467a24Schristos : "r" ((USItype) (u)), \ 123903467a24Schristos "r" ((USItype) (v))) 124003467a24Schristos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 124103467a24Schristos __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ 124203467a24Schristos : "=&r" ((USItype) (__q)), \ 124303467a24Schristos "=&r" ((USItype) (__r)) \ 124403467a24Schristos : "r" ((USItype) (__n1)), \ 124503467a24Schristos "r" ((USItype) (__n0)), \ 124603467a24Schristos "r" ((USItype) (__d))) 124703467a24Schristos #else 124803467a24Schristos #if defined (__sparclite__) 124903467a24Schristos /* This has hardware multiply but not divide. It also has two additional 125003467a24Schristos instructions scan (ffs from high bit) and divscc. */ 125103467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 125203467a24Schristos __asm__ ("umul %2,%3,%1;rd %%y,%0" \ 125303467a24Schristos : "=r" ((USItype) (w1)), \ 125403467a24Schristos "=r" ((USItype) (w0)) \ 125503467a24Schristos : "r" ((USItype) (u)), \ 125603467a24Schristos "r" ((USItype) (v))) 125703467a24Schristos #define udiv_qrnnd(q, r, n1, n0, d) \ 125803467a24Schristos __asm__ ("! Inlined udiv_qrnnd\n" \ 125903467a24Schristos " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ 126003467a24Schristos " tst %%g0\n" \ 126103467a24Schristos " divscc %3,%4,%%g1\n" \ 126203467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126303467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126403467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126503467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126603467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126703467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126803467a24Schristos " divscc %%g1,%4,%%g1\n" \ 126903467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127003467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127103467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127203467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127303467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127403467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127503467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127603467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127703467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127803467a24Schristos " divscc %%g1,%4,%%g1\n" \ 127903467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128003467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128103467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128203467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128303467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128403467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128503467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128603467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128703467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128803467a24Schristos " divscc %%g1,%4,%%g1\n" \ 128903467a24Schristos " divscc %%g1,%4,%%g1\n" \ 129003467a24Schristos " divscc %%g1,%4,%%g1\n" \ 129103467a24Schristos " divscc %%g1,%4,%%g1\n" \ 129203467a24Schristos " divscc %%g1,%4,%0\n" \ 129303467a24Schristos " rd %%y,%1\n" \ 129403467a24Schristos " bl,a 1f\n" \ 129503467a24Schristos " add %1,%4,%1\n" \ 129603467a24Schristos "1: ! End of inline udiv_qrnnd" \ 129703467a24Schristos : "=r" ((USItype) (q)), \ 129803467a24Schristos "=r" ((USItype) (r)) \ 129903467a24Schristos : "r" ((USItype) (n1)), \ 130003467a24Schristos "r" ((USItype) (n0)), \ 130103467a24Schristos "rI" ((USItype) (d)) \ 130203467a24Schristos : "g1" __AND_CLOBBER_CC) 130303467a24Schristos #define UDIV_TIME 37 130403467a24Schristos #define count_leading_zeros(count, x) \ 130503467a24Schristos do { \ 130603467a24Schristos __asm__ ("scan %1,1,%0" \ 130703467a24Schristos : "=r" ((USItype) (count)) \ 130803467a24Schristos : "r" ((USItype) (x))); \ 130903467a24Schristos } while (0) 131003467a24Schristos /* Early sparclites return 63 for an argument of 0, but they warn that future 131103467a24Schristos implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 131203467a24Schristos undefined. */ 131303467a24Schristos #else 131403467a24Schristos /* SPARC without integer multiplication and divide instructions. 131503467a24Schristos (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ 131603467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 131703467a24Schristos __asm__ ("! Inlined umul_ppmm\n" \ 131803467a24Schristos " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\ 131903467a24Schristos " sra %3,31,%%o5 ! Don't move this insn\n" \ 132003467a24Schristos " and %2,%%o5,%%o5 ! Don't move this insn\n" \ 132103467a24Schristos " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ 132203467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132303467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132403467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132503467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132603467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132703467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132803467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 132903467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133003467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133103467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133203467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133303467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133403467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133503467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133603467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133703467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133803467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 133903467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134003467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134103467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134203467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134303467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134403467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134503467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134603467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134703467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134803467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 134903467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 135003467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 135103467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 135203467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 135303467a24Schristos " mulscc %%g1,%3,%%g1\n" \ 135403467a24Schristos " mulscc %%g1,0,%%g1\n" \ 135503467a24Schristos " add %%g1,%%o5,%0\n" \ 135603467a24Schristos " rd %%y,%1" \ 135703467a24Schristos : "=r" ((USItype) (w1)), \ 135803467a24Schristos "=r" ((USItype) (w0)) \ 135903467a24Schristos : "%rI" ((USItype) (u)), \ 136003467a24Schristos "r" ((USItype) (v)) \ 136103467a24Schristos : "g1", "o5" __AND_CLOBBER_CC) 136203467a24Schristos #define UMUL_TIME 39 /* 39 instructions */ 136303467a24Schristos /* It's quite necessary to add this much assembler for the sparc. 136403467a24Schristos The default udiv_qrnnd (in C) is more than 10 times slower! */ 136503467a24Schristos #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ 136603467a24Schristos __asm__ ("! Inlined udiv_qrnnd\n" \ 136703467a24Schristos " mov 32,%%g1\n" \ 136803467a24Schristos " subcc %1,%2,%%g0\n" \ 136903467a24Schristos "1: bcs 5f\n" \ 137003467a24Schristos " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ 137103467a24Schristos " sub %1,%2,%1 ! this kills msb of n\n" \ 137203467a24Schristos " addx %1,%1,%1 ! so this can't give carry\n" \ 137303467a24Schristos " subcc %%g1,1,%%g1\n" \ 137403467a24Schristos "2: bne 1b\n" \ 137503467a24Schristos " subcc %1,%2,%%g0\n" \ 137603467a24Schristos " bcs 3f\n" \ 137703467a24Schristos " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ 137803467a24Schristos " b 3f\n" \ 137903467a24Schristos " sub %1,%2,%1 ! this kills msb of n\n" \ 138003467a24Schristos "4: sub %1,%2,%1\n" \ 138103467a24Schristos "5: addxcc %1,%1,%1\n" \ 138203467a24Schristos " bcc 2b\n" \ 138303467a24Schristos " subcc %%g1,1,%%g1\n" \ 138403467a24Schristos "! Got carry from n. Subtract next step to cancel this carry.\n" \ 138503467a24Schristos " bne 4b\n" \ 138603467a24Schristos " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \ 138703467a24Schristos " sub %1,%2,%1\n" \ 138803467a24Schristos "3: xnor %0,0,%0\n" \ 138903467a24Schristos " ! End of inline udiv_qrnnd" \ 139003467a24Schristos : "=&r" ((USItype) (__q)), \ 139103467a24Schristos "=&r" ((USItype) (__r)) \ 139203467a24Schristos : "r" ((USItype) (__d)), \ 139303467a24Schristos "1" ((USItype) (__n1)), \ 139403467a24Schristos "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC) 139503467a24Schristos #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ 139603467a24Schristos #endif /* __sparclite__ */ 139703467a24Schristos #endif /* __sparc_v8__ */ 139803467a24Schristos #endif /* __sparc_v9__ */ 139903467a24Schristos #endif /* sparc32 */ 140003467a24Schristos 140103467a24Schristos #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \ 140203467a24Schristos && W_TYPE_SIZE == 64 140303467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 140403467a24Schristos do { \ 140503467a24Schristos UDItype __carry = 0; \ 140603467a24Schristos __asm__ ("addcc\t%r5,%6,%1\n\t" \ 140703467a24Schristos "add\t%r3,%4,%0\n\t" \ 140803467a24Schristos "movcs\t%%xcc, 1, %2\n\t" \ 140903467a24Schristos "add\t%0, %2, %0" \ 141003467a24Schristos : "=r" ((UDItype)(sh)), \ 141103467a24Schristos "=&r" ((UDItype)(sl)), \ 141203467a24Schristos "+r" (__carry) \ 141303467a24Schristos : "%rJ" ((UDItype)(ah)), \ 141403467a24Schristos "rI" ((UDItype)(bh)), \ 141503467a24Schristos "%rJ" ((UDItype)(al)), \ 141603467a24Schristos "rI" ((UDItype)(bl)) \ 141703467a24Schristos __CLOBBER_CC); \ 141803467a24Schristos } while (0) 141903467a24Schristos 142003467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 142103467a24Schristos do { \ 142203467a24Schristos UDItype __carry = 0; \ 142303467a24Schristos __asm__ ("subcc\t%r5,%6,%1\n\t" \ 142403467a24Schristos "sub\t%r3,%4,%0\n\t" \ 142503467a24Schristos "movcs\t%%xcc, 1, %2\n\t" \ 142603467a24Schristos "sub\t%0, %2, %0" \ 142703467a24Schristos : "=r" ((UDItype)(sh)), \ 142803467a24Schristos "=&r" ((UDItype)(sl)), \ 142903467a24Schristos "+r" (__carry) \ 143003467a24Schristos : "%rJ" ((UDItype)(ah)), \ 143103467a24Schristos "rI" ((UDItype)(bh)), \ 143203467a24Schristos "%rJ" ((UDItype)(al)), \ 143303467a24Schristos "rI" ((UDItype)(bl)) \ 143403467a24Schristos __CLOBBER_CC); \ 143503467a24Schristos } while (0) 143603467a24Schristos 143703467a24Schristos #define umul_ppmm(wh, wl, u, v) \ 143803467a24Schristos do { \ 143903467a24Schristos UDItype tmp1, tmp2, tmp3, tmp4; \ 144003467a24Schristos __asm__ __volatile__ ( \ 144103467a24Schristos "srl %7,0,%3\n\t" \ 144203467a24Schristos "mulx %3,%6,%1\n\t" \ 144303467a24Schristos "srlx %6,32,%2\n\t" \ 144403467a24Schristos "mulx %2,%3,%4\n\t" \ 144503467a24Schristos "sllx %4,32,%5\n\t" \ 144603467a24Schristos "srl %6,0,%3\n\t" \ 144703467a24Schristos "sub %1,%5,%5\n\t" \ 144803467a24Schristos "srlx %5,32,%5\n\t" \ 144903467a24Schristos "addcc %4,%5,%4\n\t" \ 145003467a24Schristos "srlx %7,32,%5\n\t" \ 145103467a24Schristos "mulx %3,%5,%3\n\t" \ 145203467a24Schristos "mulx %2,%5,%5\n\t" \ 145303467a24Schristos "sethi %%hi(0x80000000),%2\n\t" \ 145403467a24Schristos "addcc %4,%3,%4\n\t" \ 145503467a24Schristos "srlx %4,32,%4\n\t" \ 145603467a24Schristos "add %2,%2,%2\n\t" \ 145703467a24Schristos "movcc %%xcc,%%g0,%2\n\t" \ 145803467a24Schristos "addcc %5,%4,%5\n\t" \ 145903467a24Schristos "sllx %3,32,%3\n\t" \ 146003467a24Schristos "add %1,%3,%1\n\t" \ 146103467a24Schristos "add %5,%2,%0" \ 146203467a24Schristos : "=r" ((UDItype)(wh)), \ 146303467a24Schristos "=&r" ((UDItype)(wl)), \ 146403467a24Schristos "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \ 146503467a24Schristos : "r" ((UDItype)(u)), \ 146603467a24Schristos "r" ((UDItype)(v)) \ 146703467a24Schristos __CLOBBER_CC); \ 146803467a24Schristos } while (0) 146903467a24Schristos #define UMUL_TIME 96 147003467a24Schristos #define UDIV_TIME 230 147103467a24Schristos #endif /* sparc64 */ 147203467a24Schristos 147303467a24Schristos #if defined (__vax__) && W_TYPE_SIZE == 32 147403467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 147503467a24Schristos __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ 147603467a24Schristos : "=g" ((USItype) (sh)), \ 147703467a24Schristos "=&g" ((USItype) (sl)) \ 147803467a24Schristos : "%0" ((USItype) (ah)), \ 147903467a24Schristos "g" ((USItype) (bh)), \ 148003467a24Schristos "%1" ((USItype) (al)), \ 148103467a24Schristos "g" ((USItype) (bl))) 148203467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 148303467a24Schristos __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ 148403467a24Schristos : "=g" ((USItype) (sh)), \ 148503467a24Schristos "=&g" ((USItype) (sl)) \ 148603467a24Schristos : "0" ((USItype) (ah)), \ 148703467a24Schristos "g" ((USItype) (bh)), \ 148803467a24Schristos "1" ((USItype) (al)), \ 148903467a24Schristos "g" ((USItype) (bl))) 149003467a24Schristos #define umul_ppmm(xh, xl, m0, m1) \ 149103467a24Schristos do { \ 149203467a24Schristos union { \ 149303467a24Schristos UDItype __ll; \ 149403467a24Schristos struct {USItype __l, __h;} __i; \ 149503467a24Schristos } __xx; \ 149603467a24Schristos USItype __m0 = (m0), __m1 = (m1); \ 149703467a24Schristos __asm__ ("emul %1,%2,$0,%0" \ 149803467a24Schristos : "=r" (__xx.__ll) \ 149903467a24Schristos : "g" (__m0), \ 150003467a24Schristos "g" (__m1)); \ 150103467a24Schristos (xh) = __xx.__i.__h; \ 150203467a24Schristos (xl) = __xx.__i.__l; \ 150303467a24Schristos (xh) += ((((SItype) __m0 >> 31) & __m1) \ 150403467a24Schristos + (((SItype) __m1 >> 31) & __m0)); \ 150503467a24Schristos } while (0) 150603467a24Schristos #define sdiv_qrnnd(q, r, n1, n0, d) \ 150703467a24Schristos do { \ 150803467a24Schristos union {DItype __ll; \ 150903467a24Schristos struct {SItype __l, __h;} __i; \ 151003467a24Schristos } __xx; \ 151103467a24Schristos __xx.__i.__h = n1; __xx.__i.__l = n0; \ 151203467a24Schristos __asm__ ("ediv %3,%2,%0,%1" \ 151303467a24Schristos : "=g" (q), "=g" (r) \ 151403467a24Schristos : "g" (__xx.__ll), "g" (d)); \ 151503467a24Schristos } while (0) 151603467a24Schristos #endif /* __vax__ */ 151703467a24Schristos 151803467a24Schristos #ifdef _TMS320C6X 151903467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 152003467a24Schristos do \ 152103467a24Schristos { \ 152203467a24Schristos UDItype __ll; \ 152303467a24Schristos __asm__ ("addu .l1 %1, %2, %0" \ 152403467a24Schristos : "=a" (__ll) : "a" (al), "a" (bl)); \ 152503467a24Schristos (sl) = (USItype)__ll; \ 152603467a24Schristos (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \ 152703467a24Schristos } \ 152803467a24Schristos while (0) 152903467a24Schristos 153003467a24Schristos #ifdef _TMS320C6400_PLUS 153103467a24Schristos #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) 153203467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 153303467a24Schristos do { \ 153403467a24Schristos UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ 153503467a24Schristos (w1) = (USItype) (__x >> 32); \ 153603467a24Schristos (w0) = (USItype) (__x); \ 153703467a24Schristos } while (0) 153803467a24Schristos #endif /* _TMS320C6400_PLUS */ 153903467a24Schristos 154003467a24Schristos #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) 154103467a24Schristos #ifdef _TMS320C6400 154203467a24Schristos #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) 154303467a24Schristos #endif 154403467a24Schristos #define UMUL_TIME 4 154503467a24Schristos #define UDIV_TIME 40 154603467a24Schristos #endif /* _TMS320C6X */ 154703467a24Schristos 154803467a24Schristos #if defined (__xtensa__) && W_TYPE_SIZE == 32 154903467a24Schristos /* This code is not Xtensa-configuration-specific, so rely on the compiler 155003467a24Schristos to expand builtin functions depending on what configuration features 155103467a24Schristos are available. This avoids library calls when the operation can be 155203467a24Schristos performed in-line. */ 155303467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 155403467a24Schristos do { \ 155503467a24Schristos DWunion __w; \ 155603467a24Schristos __w.ll = __builtin_umulsidi3 (u, v); \ 155703467a24Schristos w1 = __w.s.high; \ 155803467a24Schristos w0 = __w.s.low; \ 155903467a24Schristos } while (0) 156003467a24Schristos #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v) 156103467a24Schristos #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) 156203467a24Schristos #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) 156303467a24Schristos #endif /* __xtensa__ */ 156403467a24Schristos 156503467a24Schristos #if defined xstormy16 156603467a24Schristos extern UHItype __stormy16_count_leading_zeros (UHItype); 156703467a24Schristos #define count_leading_zeros(count, x) \ 156803467a24Schristos do \ 156903467a24Schristos { \ 157003467a24Schristos UHItype size; \ 157103467a24Schristos \ 157203467a24Schristos /* We assume that W_TYPE_SIZE is a multiple of 16... */ \ 157303467a24Schristos for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \ 157403467a24Schristos { \ 157503467a24Schristos UHItype c; \ 157603467a24Schristos \ 157703467a24Schristos c = __clzhi2 ((x) >> (size - 16)); \ 157803467a24Schristos (count) += c; \ 157903467a24Schristos if (c != 16) \ 158003467a24Schristos break; \ 158103467a24Schristos } \ 158203467a24Schristos } \ 158303467a24Schristos while (0) 158403467a24Schristos #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 158503467a24Schristos #endif 158603467a24Schristos 158703467a24Schristos #if defined (__z8000__) && W_TYPE_SIZE == 16 158803467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 158903467a24Schristos __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ 159003467a24Schristos : "=r" ((unsigned int)(sh)), \ 159103467a24Schristos "=&r" ((unsigned int)(sl)) \ 159203467a24Schristos : "%0" ((unsigned int)(ah)), \ 159303467a24Schristos "r" ((unsigned int)(bh)), \ 159403467a24Schristos "%1" ((unsigned int)(al)), \ 159503467a24Schristos "rQR" ((unsigned int)(bl))) 159603467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 159703467a24Schristos __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ 159803467a24Schristos : "=r" ((unsigned int)(sh)), \ 159903467a24Schristos "=&r" ((unsigned int)(sl)) \ 160003467a24Schristos : "0" ((unsigned int)(ah)), \ 160103467a24Schristos "r" ((unsigned int)(bh)), \ 160203467a24Schristos "1" ((unsigned int)(al)), \ 160303467a24Schristos "rQR" ((unsigned int)(bl))) 160403467a24Schristos #define umul_ppmm(xh, xl, m0, m1) \ 160503467a24Schristos do { \ 160603467a24Schristos union {long int __ll; \ 160703467a24Schristos struct {unsigned int __h, __l;} __i; \ 160803467a24Schristos } __xx; \ 160903467a24Schristos unsigned int __m0 = (m0), __m1 = (m1); \ 161003467a24Schristos __asm__ ("mult %S0,%H3" \ 161103467a24Schristos : "=r" (__xx.__i.__h), \ 161203467a24Schristos "=r" (__xx.__i.__l) \ 161303467a24Schristos : "%1" (__m0), \ 161403467a24Schristos "rQR" (__m1)); \ 161503467a24Schristos (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ 161603467a24Schristos (xh) += ((((signed int) __m0 >> 15) & __m1) \ 161703467a24Schristos + (((signed int) __m1 >> 15) & __m0)); \ 161803467a24Schristos } while (0) 161903467a24Schristos #endif /* __z8000__ */ 162003467a24Schristos 162103467a24Schristos #endif /* __GNUC__ */ 162203467a24Schristos 162303467a24Schristos /* If this machine has no inline assembler, use C macros. */ 162403467a24Schristos 162503467a24Schristos #if !defined (add_ssaaaa) 162603467a24Schristos #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ 162703467a24Schristos do { \ 162803467a24Schristos UWtype __x; \ 162903467a24Schristos __x = (al) + (bl); \ 163003467a24Schristos (sh) = (ah) + (bh) + (__x < (al)); \ 163103467a24Schristos (sl) = __x; \ 163203467a24Schristos } while (0) 163303467a24Schristos #endif 163403467a24Schristos 163503467a24Schristos #if !defined (sub_ddmmss) 163603467a24Schristos #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ 163703467a24Schristos do { \ 163803467a24Schristos UWtype __x; \ 163903467a24Schristos __x = (al) - (bl); \ 164003467a24Schristos (sh) = (ah) - (bh) - (__x > (al)); \ 164103467a24Schristos (sl) = __x; \ 164203467a24Schristos } while (0) 164303467a24Schristos #endif 164403467a24Schristos 164503467a24Schristos /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of 164603467a24Schristos smul_ppmm. */ 164703467a24Schristos #if !defined (umul_ppmm) && defined (smul_ppmm) 164803467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 164903467a24Schristos do { \ 165003467a24Schristos UWtype __w1; \ 165103467a24Schristos UWtype __xm0 = (u), __xm1 = (v); \ 165203467a24Schristos smul_ppmm (__w1, w0, __xm0, __xm1); \ 165303467a24Schristos (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ 165403467a24Schristos + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ 165503467a24Schristos } while (0) 165603467a24Schristos #endif 165703467a24Schristos 165803467a24Schristos /* If we still don't have umul_ppmm, define it using plain C. */ 165903467a24Schristos #if !defined (umul_ppmm) 166003467a24Schristos #define umul_ppmm(w1, w0, u, v) \ 166103467a24Schristos do { \ 166203467a24Schristos UWtype __x0, __x1, __x2, __x3; \ 166303467a24Schristos UHWtype __ul, __vl, __uh, __vh; \ 166403467a24Schristos \ 166503467a24Schristos __ul = __ll_lowpart (u); \ 166603467a24Schristos __uh = __ll_highpart (u); \ 166703467a24Schristos __vl = __ll_lowpart (v); \ 166803467a24Schristos __vh = __ll_highpart (v); \ 166903467a24Schristos \ 167003467a24Schristos __x0 = (UWtype) __ul * __vl; \ 167103467a24Schristos __x1 = (UWtype) __ul * __vh; \ 167203467a24Schristos __x2 = (UWtype) __uh * __vl; \ 167303467a24Schristos __x3 = (UWtype) __uh * __vh; \ 167403467a24Schristos \ 167503467a24Schristos __x1 += __ll_highpart (__x0);/* this can't give carry */ \ 167603467a24Schristos __x1 += __x2; /* but this indeed can */ \ 167703467a24Schristos if (__x1 < __x2) /* did we get it? */ \ 167803467a24Schristos __x3 += __ll_B; /* yes, add it in the proper pos. */ \ 167903467a24Schristos \ 168003467a24Schristos (w1) = __x3 + __ll_highpart (__x1); \ 168103467a24Schristos (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ 168203467a24Schristos } while (0) 168303467a24Schristos #endif 168403467a24Schristos 168503467a24Schristos #if !defined (__umulsidi3) 168603467a24Schristos #define __umulsidi3(u, v) \ 168703467a24Schristos ({DWunion __w; \ 168803467a24Schristos umul_ppmm (__w.s.high, __w.s.low, u, v); \ 168903467a24Schristos __w.ll; }) 169003467a24Schristos #endif 169103467a24Schristos 169203467a24Schristos /* Define this unconditionally, so it can be used for debugging. */ 169303467a24Schristos #define __udiv_qrnnd_c(q, r, n1, n0, d) \ 169403467a24Schristos do { \ 169503467a24Schristos UWtype __d1, __d0, __q1, __q0; \ 169603467a24Schristos UWtype __r1, __r0, __m; \ 169703467a24Schristos __d1 = __ll_highpart (d); \ 169803467a24Schristos __d0 = __ll_lowpart (d); \ 169903467a24Schristos \ 170003467a24Schristos __r1 = (n1) % __d1; \ 170103467a24Schristos __q1 = (n1) / __d1; \ 170203467a24Schristos __m = (UWtype) __q1 * __d0; \ 170303467a24Schristos __r1 = __r1 * __ll_B | __ll_highpart (n0); \ 170403467a24Schristos if (__r1 < __m) \ 170503467a24Schristos { \ 170603467a24Schristos __q1--, __r1 += (d); \ 170703467a24Schristos if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ 170803467a24Schristos if (__r1 < __m) \ 170903467a24Schristos __q1--, __r1 += (d); \ 171003467a24Schristos } \ 171103467a24Schristos __r1 -= __m; \ 171203467a24Schristos \ 171303467a24Schristos __r0 = __r1 % __d1; \ 171403467a24Schristos __q0 = __r1 / __d1; \ 171503467a24Schristos __m = (UWtype) __q0 * __d0; \ 171603467a24Schristos __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ 171703467a24Schristos if (__r0 < __m) \ 171803467a24Schristos { \ 171903467a24Schristos __q0--, __r0 += (d); \ 172003467a24Schristos if (__r0 >= (d)) \ 172103467a24Schristos if (__r0 < __m) \ 172203467a24Schristos __q0--, __r0 += (d); \ 172303467a24Schristos } \ 172403467a24Schristos __r0 -= __m; \ 172503467a24Schristos \ 172603467a24Schristos (q) = (UWtype) __q1 * __ll_B | __q0; \ 172703467a24Schristos (r) = __r0; \ 172803467a24Schristos } while (0) 172903467a24Schristos 173003467a24Schristos /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through 173103467a24Schristos __udiv_w_sdiv (defined in libgcc or elsewhere). */ 173203467a24Schristos #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) 173303467a24Schristos #define udiv_qrnnd(q, r, nh, nl, d) \ 173403467a24Schristos do { \ 1735968cf8f2Schristos extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \ 1736968cf8f2Schristos UWtype __r; \ 173703467a24Schristos (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ 173803467a24Schristos (r) = __r; \ 173903467a24Schristos } while (0) 174003467a24Schristos #endif 174103467a24Schristos 174203467a24Schristos /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ 174303467a24Schristos #if !defined (udiv_qrnnd) 174403467a24Schristos #define UDIV_NEEDS_NORMALIZATION 1 174503467a24Schristos #define udiv_qrnnd __udiv_qrnnd_c 174603467a24Schristos #endif 174703467a24Schristos 174803467a24Schristos #if !defined (count_leading_zeros) 174903467a24Schristos #define count_leading_zeros(count, x) \ 175003467a24Schristos do { \ 175103467a24Schristos UWtype __xr = (x); \ 175203467a24Schristos UWtype __a; \ 175303467a24Schristos \ 175403467a24Schristos if (W_TYPE_SIZE <= 32) \ 175503467a24Schristos { \ 175603467a24Schristos __a = __xr < ((UWtype)1<<2*__BITS4) \ 175703467a24Schristos ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \ 175803467a24Schristos : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ 175903467a24Schristos } \ 176003467a24Schristos else \ 176103467a24Schristos { \ 176203467a24Schristos for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ 176303467a24Schristos if (((__xr >> __a) & 0xff) != 0) \ 176403467a24Schristos break; \ 176503467a24Schristos } \ 176603467a24Schristos \ 176703467a24Schristos (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ 176803467a24Schristos } while (0) 176903467a24Schristos #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE 177003467a24Schristos #endif 177103467a24Schristos 177203467a24Schristos #if !defined (count_trailing_zeros) 177303467a24Schristos /* Define count_trailing_zeros using count_leading_zeros. The latter might be 177403467a24Schristos defined in asm, but if it is not, the C version above is good enough. */ 177503467a24Schristos #define count_trailing_zeros(count, x) \ 177603467a24Schristos do { \ 177703467a24Schristos UWtype __ctz_x = (x); \ 177803467a24Schristos UWtype __ctz_c; \ 177903467a24Schristos count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ 178003467a24Schristos (count) = W_TYPE_SIZE - 1 - __ctz_c; \ 178103467a24Schristos } while (0) 178203467a24Schristos #endif 178303467a24Schristos 178403467a24Schristos #ifndef UDIV_NEEDS_NORMALIZATION 178503467a24Schristos #define UDIV_NEEDS_NORMALIZATION 0 178603467a24Schristos #endif 1787