xref: /netbsd-src/external/lgpl3/gmp/dist/mpz/aorsmul_i.c (revision fc4f42693f9b1c31f39f9cf50af1bf2010325808)
1 /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
2 
3    THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
4    ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
5    COMPLETELY IN FUTURE GNU MP RELEASES.
6 
7 Copyright 2001, 2002, 2004, 2005, 2012 Free Software Foundation, Inc.
8 
9 This file is part of the GNU MP Library.
10 
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of either:
13 
14   * the GNU Lesser General Public License as published by the Free
15     Software Foundation; either version 3 of the License, or (at your
16     option) any later version.
17 
18 or
19 
20   * the GNU General Public License as published by the Free Software
21     Foundation; either version 2 of the License, or (at your option) any
22     later version.
23 
24 or both in parallel, as here.
25 
26 The GNU MP Library is distributed in the hope that it will be useful, but
27 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29 for more details.
30 
31 You should have received copies of the GNU General Public License and the
32 GNU Lesser General Public License along with the GNU MP Library.  If not,
33 see https://www.gnu.org/licenses/.  */
34 
35 #include "gmp.h"
36 #include "gmp-impl.h"
37 
38 
39 #if HAVE_NATIVE_mpn_mul_1c
40 #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
41   do {                                                  \
42     (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
43   } while (0)
44 #else
45 #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
46   do {                                                  \
47     mp_limb_t __cy;                                     \
48     __cy = mpn_mul_1 (dst, src, size, n);               \
49     (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
50   } while (0)
51 #endif
52 
53 
54 /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
55 
56    All that's needed to account for negative w or x is to flip "sub".
57 
58    The final w will retain its sign, unless an underflow occurs in a submul
59    of absolute values, in which case it's flipped.
60 
61    If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
62    used.  The alternative would be mpn_mul_1 into temporary space followed
63    by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
64    a chance of being faster since it involves only one set of carry
65    propagations, not two.  Note that doing an addmul_1 with a
66    twos-complement negative y doesn't work, because it effectively adds an
67    extra x * 2^GMP_LIMB_BITS.  */
68 
69 REGPARM_ATTR(1) void
70 mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
71 {
72   mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
73   mp_srcptr  xp;
74   mp_ptr     wp;
75   mp_limb_t  cy;
76 
77   /* w unaffected if x==0 or y==0 */
78   xsize = SIZ (x);
79   if (xsize == 0 || y == 0)
80     return;
81 
82   sub ^= xsize;
83   xsize = ABS (xsize);
84 
85   wsize_signed = SIZ (w);
86   if (wsize_signed == 0)
87     {
88       /* nothing to add to, just set x*y, "sub" gives the sign */
89       wp = MPZ_REALLOC (w, xsize+1);
90       cy = mpn_mul_1 (wp, PTR(x), xsize, y);
91       wp[xsize] = cy;
92       xsize += (cy != 0);
93       SIZ (w) = (sub >= 0 ? xsize : -xsize);
94       return;
95     }
96 
97   sub ^= wsize_signed;
98   wsize = ABS (wsize_signed);
99 
100   new_wsize = MAX (wsize, xsize);
101   wp = MPZ_REALLOC (w, new_wsize+1);
102   xp = PTR (x);
103   min_size = MIN (wsize, xsize);
104 
105   if (sub >= 0)
106     {
107       /* addmul of absolute values */
108 
109       cy = mpn_addmul_1 (wp, xp, min_size, y);
110       wp += min_size;
111       xp += min_size;
112 
113       dsize = xsize - wsize;
114 #if HAVE_NATIVE_mpn_mul_1c
115       if (dsize > 0)
116 	cy = mpn_mul_1c (wp, xp, dsize, y, cy);
117       else if (dsize < 0)
118 	{
119 	  dsize = -dsize;
120 	  cy = mpn_add_1 (wp, wp, dsize, cy);
121 	}
122 #else
123       if (dsize != 0)
124 	{
125 	  mp_limb_t  cy2;
126 	  if (dsize > 0)
127 	    cy2 = mpn_mul_1 (wp, xp, dsize, y);
128 	  else
129 	    {
130 	      dsize = -dsize;
131 	      cy2 = 0;
132 	    }
133 	  cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
134 	}
135 #endif
136 
137       wp[dsize] = cy;
138       new_wsize += (cy != 0);
139     }
140   else
141     {
142       /* submul of absolute values */
143 
144       cy = mpn_submul_1 (wp, xp, min_size, y);
145       if (wsize >= xsize)
146 	{
147 	  /* if w bigger than x, then propagate borrow through it */
148 	  if (wsize != xsize)
149 	    cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
150 
151 	  if (cy != 0)
152 	    {
153 	      /* Borrow out of w, take twos complement negative to get
154 		 absolute value, flip sign of w.  */
155 	      wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
156 	      mpn_com (wp, wp, new_wsize);
157 	      new_wsize++;
158 	      MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
159 	      wsize_signed = -wsize_signed;
160 	    }
161 	}
162       else /* wsize < xsize */
163 	{
164 	  /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
165 	     take twos complement and use an mpn_mul_1 for the rest.  */
166 
167 	  mp_limb_t  cy2;
168 
169 	  /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
170 	  mpn_com (wp, wp, wsize);
171 	  cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
172 	  cy -= 1;
173 
174 	  /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
175 	     returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
176 	  cy2 = (cy == MP_LIMB_T_MAX);
177 	  cy += cy2;
178 	  MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
179 	  wp[new_wsize] = cy;
180 	  new_wsize += (cy != 0);
181 
182 	  /* Apply any -1 from above.  The value at wp+wsize is non-zero
183 	     because y!=0 and the high limb of x will be non-zero.  */
184 	  if (cy2)
185 	    MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
186 
187 	  wsize_signed = -wsize_signed;
188 	}
189 
190       /* submul can produce high zero limbs due to cancellation, both when w
191 	 has more limbs or x has more  */
192       MPN_NORMALIZE (wp, new_wsize);
193     }
194 
195   SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
196 
197   ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
198 }
199 
200 
201 void
202 mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
203 {
204 #if BITS_PER_ULONG > GMP_NUMB_BITS
205   if (UNLIKELY (y > GMP_NUMB_MAX))
206     {
207       mpz_t t;
208       mp_ptr tp;
209       mp_size_t xn;
210       TMP_DECL;
211       TMP_MARK;
212       xn = SIZ (x);
213       if (xn == 0) return;
214       MPZ_TMP_INIT (t, ABS (xn) + 1);
215       tp = PTR (t);
216       tp[0] = 0;
217       MPN_COPY (tp + 1, PTR(x), ABS (xn));
218       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
219       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
220       PTR(t) = tp + 1;
221       SIZ(t) = xn;
222       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
223       TMP_FREE;
224       return;
225     }
226 #endif
227   mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
228 }
229 
230 void
231 mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
232 {
233 #if BITS_PER_ULONG > GMP_NUMB_BITS
234   if (y > GMP_NUMB_MAX)
235     {
236       mpz_t t;
237       mp_ptr tp;
238       mp_size_t xn;
239       TMP_DECL;
240       TMP_MARK;
241       xn = SIZ (x);
242       if (xn == 0) return;
243       MPZ_TMP_INIT (t, ABS (xn) + 1);
244       tp = PTR (t);
245       tp[0] = 0;
246       MPN_COPY (tp + 1, PTR(x), ABS (xn));
247       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
248       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
249       PTR(t) = tp + 1;
250       SIZ(t) = xn;
251       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
252       TMP_FREE;
253       return;
254     }
255 #endif
256   mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
257 }
258