1 /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple. 2 3 THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS 4 ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR 5 COMPLETELY IN FUTURE GNU MP RELEASES. 6 7 Copyright 2001, 2002, 2004, 2005, 2012 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of either: 13 14 * the GNU Lesser General Public License as published by the Free 15 Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 or 19 20 * the GNU General Public License as published by the Free Software 21 Foundation; either version 2 of the License, or (at your option) any 22 later version. 23 24 or both in parallel, as here. 25 26 The GNU MP Library is distributed in the hope that it will be useful, but 27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 28 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 29 for more details. 30 31 You should have received copies of the GNU General Public License and the 32 GNU Lesser General Public License along with the GNU MP Library. If not, 33 see https://www.gnu.org/licenses/. */ 34 35 #include "gmp-impl.h" 36 37 38 #if HAVE_NATIVE_mpn_mul_1c 39 #define MPN_MUL_1C(cout, dst, src, size, n, cin) \ 40 do { \ 41 (cout) = mpn_mul_1c (dst, src, size, n, cin); \ 42 } while (0) 43 #else 44 #define MPN_MUL_1C(cout, dst, src, size, n, cin) \ 45 do { \ 46 mp_limb_t __cy; \ 47 __cy = mpn_mul_1 (dst, src, size, n); \ 48 (cout) = __cy + mpn_add_1 (dst, dst, size, cin); \ 49 } while (0) 50 #endif 51 52 53 /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y. 54 55 All that's needed to account for negative w or x is to flip "sub". 56 57 The final w will retain its sign, unless an underflow occurs in a submul 58 of absolute values, in which case it's flipped. 59 60 If x has more limbs than w, then mpn_submul_1 followed by mpn_com is 61 used. The alternative would be mpn_mul_1 into temporary space followed 62 by mpn_sub_n. Avoiding temporary space seem good, and submul+com stands 63 a chance of being faster since it involves only one set of carry 64 propagations, not two. Note that doing an addmul_1 with a 65 twos-complement negative y doesn't work, because it effectively adds an 66 extra x * 2^GMP_LIMB_BITS. */ 67 68 REGPARM_ATTR(1) void 69 mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub) 70 { 71 mp_size_t xsize, wsize, wsize_signed, new_wsize, min_size, dsize; 72 mp_srcptr xp; 73 mp_ptr wp; 74 mp_limb_t cy; 75 76 /* w unaffected if x==0 or y==0 */ 77 xsize = SIZ (x); 78 if (xsize == 0 || y == 0) 79 return; 80 81 sub ^= xsize; 82 xsize = ABS (xsize); 83 84 wsize_signed = SIZ (w); 85 if (wsize_signed == 0) 86 { 87 /* nothing to add to, just set x*y, "sub" gives the sign */ 88 wp = MPZ_REALLOC (w, xsize+1); 89 cy = mpn_mul_1 (wp, PTR(x), xsize, y); 90 wp[xsize] = cy; 91 xsize += (cy != 0); 92 SIZ (w) = (sub >= 0 ? xsize : -xsize); 93 return; 94 } 95 96 sub ^= wsize_signed; 97 wsize = ABS (wsize_signed); 98 99 new_wsize = MAX (wsize, xsize); 100 wp = MPZ_REALLOC (w, new_wsize+1); 101 xp = PTR (x); 102 min_size = MIN (wsize, xsize); 103 104 if (sub >= 0) 105 { 106 /* addmul of absolute values */ 107 108 cy = mpn_addmul_1 (wp, xp, min_size, y); 109 wp += min_size; 110 xp += min_size; 111 112 dsize = xsize - wsize; 113 #if HAVE_NATIVE_mpn_mul_1c 114 if (dsize > 0) 115 cy = mpn_mul_1c (wp, xp, dsize, y, cy); 116 else if (dsize < 0) 117 { 118 dsize = -dsize; 119 cy = mpn_add_1 (wp, wp, dsize, cy); 120 } 121 #else 122 if (dsize != 0) 123 { 124 mp_limb_t cy2; 125 if (dsize > 0) 126 cy2 = mpn_mul_1 (wp, xp, dsize, y); 127 else 128 { 129 dsize = -dsize; 130 cy2 = 0; 131 } 132 cy = cy2 + mpn_add_1 (wp, wp, dsize, cy); 133 } 134 #endif 135 136 wp[dsize] = cy; 137 new_wsize += (cy != 0); 138 } 139 else 140 { 141 /* submul of absolute values */ 142 143 cy = mpn_submul_1 (wp, xp, min_size, y); 144 if (wsize >= xsize) 145 { 146 /* if w bigger than x, then propagate borrow through it */ 147 if (wsize != xsize) 148 cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy); 149 150 if (cy != 0) 151 { 152 /* Borrow out of w, take twos complement negative to get 153 absolute value, flip sign of w. */ 154 wp[new_wsize] = ~-cy; /* extra limb is 0-cy */ 155 mpn_com (wp, wp, new_wsize); 156 new_wsize++; 157 MPN_INCR_U (wp, new_wsize, CNST_LIMB(1)); 158 wsize_signed = -wsize_signed; 159 } 160 } 161 else /* wsize < xsize */ 162 { 163 /* x bigger than w, so want x*y-w. Submul has given w-x*y, so 164 take twos complement and use an mpn_mul_1 for the rest. */ 165 166 mp_limb_t cy2; 167 168 /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ 169 mpn_com (wp, wp, wsize); 170 cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1)); 171 cy -= 1; 172 173 /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never 174 returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */ 175 cy2 = (cy == MP_LIMB_T_MAX); 176 cy += cy2; 177 MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy); 178 wp[new_wsize] = cy; 179 new_wsize += (cy != 0); 180 181 /* Apply any -1 from above. The value at wp+wsize is non-zero 182 because y!=0 and the high limb of x will be non-zero. */ 183 if (cy2) 184 MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1)); 185 186 wsize_signed = -wsize_signed; 187 } 188 189 /* submul can produce high zero limbs due to cancellation, both when w 190 has more limbs or x has more */ 191 MPN_NORMALIZE (wp, new_wsize); 192 } 193 194 SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize); 195 196 ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0); 197 } 198 199 200 void 201 mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y) 202 { 203 #if BITS_PER_ULONG > GMP_NUMB_BITS 204 if (UNLIKELY (y > GMP_NUMB_MAX)) 205 { 206 mpz_t t; 207 mp_ptr tp; 208 mp_size_t xn; 209 TMP_DECL; 210 TMP_MARK; 211 xn = SIZ (x); 212 if (xn == 0) return; 213 MPZ_TMP_INIT (t, ABS (xn) + 1); 214 tp = PTR (t); 215 tp[0] = 0; 216 MPN_COPY (tp + 1, PTR(x), ABS (xn)); 217 SIZ(t) = xn >= 0 ? xn + 1 : xn - 1; 218 mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0); 219 PTR(t) = tp + 1; 220 SIZ(t) = xn; 221 mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0); 222 TMP_FREE; 223 return; 224 } 225 #endif 226 mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0); 227 } 228 229 void 230 mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y) 231 { 232 #if BITS_PER_ULONG > GMP_NUMB_BITS 233 if (y > GMP_NUMB_MAX) 234 { 235 mpz_t t; 236 mp_ptr tp; 237 mp_size_t xn; 238 TMP_DECL; 239 TMP_MARK; 240 xn = SIZ (x); 241 if (xn == 0) return; 242 MPZ_TMP_INIT (t, ABS (xn) + 1); 243 tp = PTR (t); 244 tp[0] = 0; 245 MPN_COPY (tp + 1, PTR(x), ABS (xn)); 246 SIZ(t) = xn >= 0 ? xn + 1 : xn - 1; 247 mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1); 248 PTR(t) = tp + 1; 249 SIZ(t) = xn; 250 mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1); 251 TMP_FREE; 252 return; 253 } 254 #endif 255 mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1); 256 } 257