xref: /netbsd-src/external/lgpl3/gmp/dist/mpz/aorsmul_i.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
2 
3    THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
4    ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5    COMPLETELY IN FUTURE GNU MP RELEASES.
6 
7 Copyright 2001, 2002, 2004, 2005, 2012 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13 
14   * the GNU Lesser General Public License as published by the Free
15     Software Foundation; either version 3 of the License, or (at your
16     option) any later version.
17 
18 or
19 
20   * the GNU General Public License as published by the Free Software
21     Foundation; either version 2 of the License, or (at your option) any
22     later version.
23 
24 or both in parallel, as here.
25 
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29 for more details.
30 
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library.  If not,
33 see https://www.gnu.org/licenses/.  */
34 
35 #include "gmp-impl.h"
36 
37 
38 #if HAVE_NATIVE_mpn_mul_1c
39 #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
40   do {                                                  \
41     (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
42   } while (0)
43 #else
44 #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
45   do {                                                  \
46     mp_limb_t __cy;                                     \
47     __cy = mpn_mul_1 (dst, src, size, n);               \
48     (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
49   } while (0)
50 #endif
51 
52 
53 /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
54 
55    All that's needed to account for negative w or x is to flip "sub".
56 
57    The final w will retain its sign, unless an underflow occurs in a submul
58    of absolute values, in which case it's flipped.
59 
60    If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
61    used.  The alternative would be mpn_mul_1 into temporary space followed
62    by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
63    a chance of being faster since it involves only one set of carry
64    propagations, not two.  Note that doing an addmul_1 with a
65    twos-complement negative y doesn't work, because it effectively adds an
66    extra x * 2^GMP_LIMB_BITS.  */
67 
68 REGPARM_ATTR(1) void
mpz_aorsmul_1(mpz_ptr w,mpz_srcptr x,mp_limb_t y,mp_size_t sub)69 mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
70 {
71   mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
72   mp_srcptr  xp;
73   mp_ptr     wp;
74   mp_limb_t  cy;
75 
76   /* w unaffected if x==0 or y==0 */
77   xsize = SIZ (x);
78   if (xsize == 0 || y == 0)
79     return;
80 
81   sub ^= xsize;
82   xsize = ABS (xsize);
83 
84   wsize_signed = SIZ (w);
85   if (wsize_signed == 0)
86     {
87       /* nothing to add to, just set x*y, "sub" gives the sign */
88       wp = MPZ_REALLOC (w, xsize+1);
89       cy = mpn_mul_1 (wp, PTR(x), xsize, y);
90       wp[xsize] = cy;
91       xsize += (cy != 0);
92       SIZ (w) = (sub >= 0 ? xsize : -xsize);
93       return;
94     }
95 
96   sub ^= wsize_signed;
97   wsize = ABS (wsize_signed);
98 
99   new_wsize = MAX (wsize, xsize);
100   wp = MPZ_REALLOC (w, new_wsize+1);
101   xp = PTR (x);
102   min_size = MIN (wsize, xsize);
103 
104   if (sub >= 0)
105     {
106       /* addmul of absolute values */
107 
108       cy = mpn_addmul_1 (wp, xp, min_size, y);
109       wp += min_size;
110       xp += min_size;
111 
112       dsize = xsize - wsize;
113 #if HAVE_NATIVE_mpn_mul_1c
114       if (dsize > 0)
115 	cy = mpn_mul_1c (wp, xp, dsize, y, cy);
116       else if (dsize < 0)
117 	{
118 	  dsize = -dsize;
119 	  cy = mpn_add_1 (wp, wp, dsize, cy);
120 	}
121 #else
122       if (dsize != 0)
123 	{
124 	  mp_limb_t  cy2;
125 	  if (dsize > 0)
126 	    cy2 = mpn_mul_1 (wp, xp, dsize, y);
127 	  else
128 	    {
129 	      dsize = -dsize;
130 	      cy2 = 0;
131 	    }
132 	  cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
133 	}
134 #endif
135 
136       wp[dsize] = cy;
137       new_wsize += (cy != 0);
138     }
139   else
140     {
141       /* submul of absolute values */
142 
143       cy = mpn_submul_1 (wp, xp, min_size, y);
144       if (wsize >= xsize)
145 	{
146 	  /* if w bigger than x, then propagate borrow through it */
147 	  if (wsize != xsize)
148 	    cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
149 
150 	  if (cy != 0)
151 	    {
152 	      /* Borrow out of w, take twos complement negative to get
153 		 absolute value, flip sign of w.  */
154 	      wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
155 	      mpn_com (wp, wp, new_wsize);
156 	      new_wsize++;
157 	      MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
158 	      wsize_signed = -wsize_signed;
159 	    }
160 	}
161       else /* wsize < xsize */
162 	{
163 	  /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
164 	     take twos complement and use an mpn_mul_1 for the rest.  */
165 
166 	  mp_limb_t  cy2;
167 
168 	  /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
169 	  mpn_com (wp, wp, wsize);
170 	  cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
171 	  cy -= 1;
172 
173 	  /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
174 	     returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
175 	  cy2 = (cy == MP_LIMB_T_MAX);
176 	  cy += cy2;
177 	  MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
178 	  wp[new_wsize] = cy;
179 	  new_wsize += (cy != 0);
180 
181 	  /* Apply any -1 from above.  The value at wp+wsize is non-zero
182 	     because y!=0 and the high limb of x will be non-zero.  */
183 	  if (cy2)
184 	    MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
185 
186 	  wsize_signed = -wsize_signed;
187 	}
188 
189       /* submul can produce high zero limbs due to cancellation, both when w
190 	 has more limbs or x has more  */
191       MPN_NORMALIZE (wp, new_wsize);
192     }
193 
194   SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
195 
196   ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
197 }
198 
199 
200 void
mpz_addmul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)201 mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
202 {
203 #if BITS_PER_ULONG > GMP_NUMB_BITS
204   if (UNLIKELY (y > GMP_NUMB_MAX))
205     {
206       mpz_t t;
207       mp_ptr tp;
208       mp_size_t xn;
209       TMP_DECL;
210       TMP_MARK;
211       xn = SIZ (x);
212       if (xn == 0) return;
213       MPZ_TMP_INIT (t, ABS (xn) + 1);
214       tp = PTR (t);
215       tp[0] = 0;
216       MPN_COPY (tp + 1, PTR(x), ABS (xn));
217       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
218       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
219       PTR(t) = tp + 1;
220       SIZ(t) = xn;
221       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
222       TMP_FREE;
223       return;
224     }
225 #endif
226   mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
227 }
228 
229 void
mpz_submul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)230 mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
231 {
232 #if BITS_PER_ULONG > GMP_NUMB_BITS
233   if (y > GMP_NUMB_MAX)
234     {
235       mpz_t t;
236       mp_ptr tp;
237       mp_size_t xn;
238       TMP_DECL;
239       TMP_MARK;
240       xn = SIZ (x);
241       if (xn == 0) return;
242       MPZ_TMP_INIT (t, ABS (xn) + 1);
243       tp = PTR (t);
244       tp[0] = 0;
245       MPN_COPY (tp + 1, PTR(x), ABS (xn));
246       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
247       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
248       PTR(t) = tp + 1;
249       SIZ(t) = xn;
250       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
251       TMP_FREE;
252       return;
253     }
254 #endif
255   mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
256 }
257