1 /* UltraSPARC 64 support macros. 2 3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST 4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN 5 FUTURE GNU MP RELEASES. 6 7 Copyright 2003 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 25 #define LOW32(x) ((x) & 0xFFFFFFFF) 26 #define HIGH32(x) ((x) >> 32) 27 28 29 /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)]. 30 Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the 31 effect of swapping the two halves in this case. */ 32 #if HAVE_LIMB_BIG_ENDIAN 33 #define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */ 34 #endif 35 #if HAVE_LIMB_LITTLE_ENDIAN 36 #define HALF_ENDIAN_ADJ(i) 0 /* no adjust */ 37 #endif 38 #ifndef HALF_ENDIAN_ADJ 39 Error, error, unknown limb endianness; 40 #endif 41 42 43 /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb 44 of that product is equal to l. dh and dl are the 32-bit halves of d. 45 46 |-----high----||----low-----| 47 +------+------+ 48 | | ph = qh * dh 49 +------+------+ 50 +------+------+ 51 | | pm1 = ql * dh 52 +------+------+ 53 +------+------+ 54 | | pm2 = qh * dl 55 +------+------+ 56 +------+------+ 57 | | pl = ql * dl (not calculated) 58 +------+------+ 59 60 Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2) 61 + HIGH(pl) == HIGH(l). The only thing we need from those product parts 62 is whether they produce a carry into the high. 63 64 pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only 65 time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) > 66 HIGH(l). pl is never actually calculated. */ 67 68 #define umul_ppmm_lowequal(h, q, d, dh, dl, l) \ 69 do { \ 70 mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \ 71 ASSERT (dh == HIGH32(d)); \ 72 ASSERT (dl == LOW32(d)); \ 73 ASSERT (q*d == l); \ 74 \ 75 ql = LOW32 (q); \ 76 qh = HIGH32 (q); \ 77 \ 78 pm1 = ql * dh; \ 79 pm2 = qh * dl; \ 80 ph = qh * dh; \ 81 \ 82 pm_l = LOW32 (pm1) + LOW32 (pm2); \ 83 \ 84 (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \ 85 + HIGH32 (pm_l) + ((pm_l << 32) > l); \ 86 \ 87 ASSERT_HIGH_PRODUCT (h, q, d); \ 88 } while (0) 89 90 91 /* Set h to the high of q*d, assuming the low limb of that product is equal 92 to l, and that d fits in 32-bits. 93 94 |-----high----||----low-----| 95 +------+------+ 96 | | pm = qh * dl 97 +------+------+ 98 +------+------+ 99 | | pl = ql * dl (not calculated) 100 +------+------+ 101 102 Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only 103 time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no 104 need to calculate pl to determine this. */ 105 106 #define umul_ppmm_half_lowequal(h, q, d, l) \ 107 do { \ 108 mp_limb_t pm; \ 109 ASSERT (q*d == l); \ 110 ASSERT (HIGH32(d) == 0); \ 111 \ 112 pm = HIGH32(q) * d; \ 113 (h) = HIGH32(pm) + ((pm << 32) > l); \ 114 ASSERT_HIGH_PRODUCT (h, q, d); \ 115 } while (0) 116 117 118 /* check that h is the high limb of x*y */ 119 #if WANT_ASSERT 120 #define ASSERT_HIGH_PRODUCT(h, x, y) \ 121 do { \ 122 mp_limb_t want_h, dummy; \ 123 umul_ppmm (want_h, dummy, x, y); \ 124 ASSERT (h == want_h); \ 125 } while (0) 126 #else 127 #define ASSERT_HIGH_PRODUCT(h, q, d) \ 128 do { } while (0) 129 #endif 130 131 132 /* Multiply u anv v, where v < 2^32. */ 133 #define umul_ppmm_s(w1, w0, u, v) \ 134 do { \ 135 UWtype __x0, __x2; \ 136 UWtype __ul, __vl, __uh; \ 137 UWtype __u = (u), __v = (v); \ 138 \ 139 __ul = __ll_lowpart (__u); \ 140 __uh = __ll_highpart (__u); \ 141 __vl = __ll_lowpart (__v); \ 142 \ 143 __x0 = (UWtype) __ul * __vl; \ 144 __x2 = (UWtype) __uh * __vl; \ 145 \ 146 (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2; \ 147 (w0) = (__x2 << W_TYPE_SIZE/2) + __x0; \ 148 } while (0) 149 150 /* Count the leading zeros on a limb, but assuming it fits in 32 bits. 151 The count returned will be in the range 32 to 63. 152 This is the 32-bit generic C count_leading_zeros from longlong.h. */ 153 #define count_leading_zeros_32(count, x) \ 154 do { \ 155 mp_limb_t __xr = (x); \ 156 unsigned __a; \ 157 ASSERT ((x) != 0); \ 158 ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \ 159 __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \ 160 : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \ 161 \ 162 (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ 163 } while (0) 164 165 166 /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits 167 32 bits and is normalized (high bit set). */ 168 #define invert_half_limb(inv, d) \ 169 do { \ 170 mp_limb_t _n; \ 171 ASSERT ((d) <= 0xFFFFFFFF); \ 172 ASSERT ((d) & 0x80000000); \ 173 _n = (((mp_limb_t) -(d)) << 32) - 1; \ 174 (inv) = (mp_limb_t) (unsigned) (_n / (d)); \ 175 } while (0) 176 177 178 /* Divide nh:nl by d, setting q to the quotient and r to the remainder. 179 q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t, 180 dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */ 181 182 #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \ 183 do { \ 184 unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \ 185 mp_limb_t _n, _x; \ 186 ASSERT (d_limb <= 0xFFFFFFFF); \ 187 ASSERT (dinv_limb <= 0xFFFFFFFF); \ 188 ASSERT (d_limb & 0x80000000); \ 189 ASSERT (nh < d_limb); \ 190 _n10 = (nl); \ 191 _n2 = (nh); \ 192 _n1 = (int) _n10 >> 31; \ 193 _nadj = _n10 + (_n1 & d_limb); \ 194 _x = dinv_limb * (_n2 - _n1) + _nadj; \ 195 _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \ 196 _n = ((mp_limb_t) _n2 << 32) + _n10; \ 197 _x = _n + d_limb * _q11n; /* n-q1*d-d */ \ 198 _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \ 199 ASSERT (_xh == 0 || _xh == ~0); \ 200 _r = _x + (d_limb & _xh); /* addback */ \ 201 _q = _xh - _q11n; /* q1+1-addback */ \ 202 ASSERT (_r < d_limb); \ 203 ASSERT (d_limb * _q + _r == _n); \ 204 (r) = _r; \ 205 (q) = _q; \ 206 } while (0) 207 208 209