1 /* UltraSPARC 64 support macros. 2 3 THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST 4 CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN 5 FUTURE GNU MP RELEASES. 6 7 Copyright 2003 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 25 #define LOW32(x) ((x) & 0xFFFFFFFF) 26 #define HIGH32(x) ((x) >> 32) 27 28 29 /* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)]. 30 Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the 31 effect of swapping the two halves in this case. */ 32 #if HAVE_LIMB_BIG_ENDIAN 33 #define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */ 34 #endif 35 #if HAVE_LIMB_LITTLE_ENDIAN 36 #define HALF_ENDIAN_ADJ(i) 0 /* no adjust */ 37 #endif 38 #ifndef HALF_ENDIAN_ADJ 39 Error, error, unknown limb endianness; 40 #endif 41 42 43 /* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb 44 of that product is equal to l. dh and dl are the 32-bit halves of d. 45 46 |-----high----||----low-----| 47 +------+------+ 48 | | ph = qh * dh 49 +------+------+ 50 +------+------+ 51 | | pm1 = ql * dh 52 +------+------+ 53 +------+------+ 54 | | pm2 = qh * dl 55 +------+------+ 56 +------+------+ 57 | | pl = ql * dl (not calculated) 58 +------+------+ 59 60 Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2) 61 + HIGH(pl) == HIGH(l). The only thing we need from those product parts 62 is whether they produce a carry into the high. 63 64 pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only 65 time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) > 66 HIGH(l). pl is never actually calculated. */ 67 68 #define umul_ppmm_lowequal(h, q, d, dh, dl, l) \ 69 do { \ 70 mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \ 71 ASSERT (dh == HIGH32(d)); \ 72 ASSERT (dl == LOW32(d)); \ 73 ASSERT (q*d == l); \ 74 \ 75 ql = LOW32 (q); \ 76 qh = HIGH32 (q); \ 77 \ 78 pm1 = ql * dh; \ 79 pm2 = qh * dl; \ 80 ph = qh * dh; \ 81 \ 82 pm_l = LOW32 (pm1) + LOW32 (pm2); \ 83 \ 84 (h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \ 85 + HIGH32 (pm_l) + ((pm_l << 32) > l); \ 86 \ 87 ASSERT_HIGH_PRODUCT (h, q, d); \ 88 } while (0) 89 90 91 /* Set h to the high of q*d, assuming the low limb of that product is equal 92 to l, and that d fits in 32-bits. 93 94 |-----high----||----low-----| 95 +------+------+ 96 | | pm = qh * dl 97 +------+------+ 98 +------+------+ 99 | | pl = ql * dl (not calculated) 100 +------+------+ 101 102 Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only 103 time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no 104 need to calculate pl to determine this. */ 105 106 #define umul_ppmm_half_lowequal(h, q, d, l) \ 107 do { \ 108 mp_limb_t pm; \ 109 ASSERT (q*d == l); \ 110 ASSERT (HIGH32(d) == 0); \ 111 \ 112 pm = HIGH32(q) * d; \ 113 (h) = HIGH32(pm) + ((pm << 32) > l); \ 114 ASSERT_HIGH_PRODUCT (h, q, d); \ 115 } while (0) 116 117 118 /* check that h is the high limb of x*y */ 119 #if WANT_ASSERT 120 #define ASSERT_HIGH_PRODUCT(h, x, y) \ 121 do { \ 122 mp_limb_t want_h, dummy; \ 123 umul_ppmm (want_h, dummy, x, y); \ 124 ASSERT (h == want_h); \ 125 } while (0) 126 #else 127 #define ASSERT_HIGH_PRODUCT(h, q, d) \ 128 do { } while (0) 129 #endif 130 131 132 /* Count the leading zeros on a limb, but assuming it fits in 32 bits. 133 The count returned will be in the range 32 to 63. 134 This is the 32-bit generic C count_leading_zeros from longlong.h. */ 135 #define count_leading_zeros_32(count, x) \ 136 do { \ 137 mp_limb_t __xr = (x); \ 138 unsigned __a; \ 139 ASSERT ((x) != 0); \ 140 ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \ 141 __a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \ 142 : (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \ 143 \ 144 (count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \ 145 } while (0) 146 147 148 /* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits 149 32 bits and is normalized (high bit set). */ 150 #define invert_half_limb(inv, d) \ 151 do { \ 152 mp_limb_t _n; \ 153 ASSERT ((d) <= 0xFFFFFFFF); \ 154 ASSERT ((d) & 0x80000000); \ 155 _n = (((mp_limb_t) -(d)) << 32) - 1; \ 156 (inv) = (mp_limb_t) (unsigned) (_n / (d)); \ 157 } while (0) 158 159 160 /* Divide nh:nl by d, setting q to the quotient and r to the remainder. 161 q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t, 162 dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */ 163 164 #define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \ 165 do { \ 166 unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \ 167 mp_limb_t _n, _x; \ 168 ASSERT (d_limb <= 0xFFFFFFFF); \ 169 ASSERT (dinv_limb <= 0xFFFFFFFF); \ 170 ASSERT (d_limb & 0x80000000); \ 171 ASSERT (nh < d_limb); \ 172 _n10 = (nl); \ 173 _n2 = (nh); \ 174 _n1 = (int) _n10 >> 31; \ 175 _nadj = _n10 + (_n1 & d_limb); \ 176 _x = dinv_limb * (_n2 - _n1) + _nadj; \ 177 _q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \ 178 _n = ((mp_limb_t) _n2 << 32) + _n10; \ 179 _x = _n + d_limb * _q11n; /* n-q1*d-d */ \ 180 _xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \ 181 ASSERT (_xh == 0 || _xh == ~0); \ 182 _r = _x + (d_limb & _xh); /* addback */ \ 183 _q = _xh - _q11n; /* q1+1-addback */ \ 184 ASSERT (_r < d_limb); \ 185 ASSERT (d_limb * _q + _r == _n); \ 186 (r) = _r; \ 187 (q) = _q; \ 188 } while (0) 189 190 191