xref: /dflybsd-src/contrib/gmp/mpz/aorsmul_i.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
286d7f5d3SJohn Marino 
386d7f5d3SJohn Marino    THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
486d7f5d3SJohn Marino    ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
586d7f5d3SJohn Marino    COMPLETELY IN FUTURE GNU MP RELEASES.
686d7f5d3SJohn Marino 
786d7f5d3SJohn Marino Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
886d7f5d3SJohn Marino 
986d7f5d3SJohn Marino This file is part of the GNU MP Library.
1086d7f5d3SJohn Marino 
1186d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1286d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1386d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1486d7f5d3SJohn Marino option) any later version.
1586d7f5d3SJohn Marino 
1686d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1786d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1886d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
1986d7f5d3SJohn Marino License for more details.
2086d7f5d3SJohn Marino 
2186d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2286d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
2386d7f5d3SJohn Marino 
2486d7f5d3SJohn Marino #include "gmp.h"
2586d7f5d3SJohn Marino #include "gmp-impl.h"
2686d7f5d3SJohn Marino 
2786d7f5d3SJohn Marino 
2886d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
2986d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
3086d7f5d3SJohn Marino   do {                                                  \
3186d7f5d3SJohn Marino     (cout) = mpn_mul_1c (dst, src, size, n, cin);       \
3286d7f5d3SJohn Marino   } while (0)
3386d7f5d3SJohn Marino #else
3486d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin)        \
3586d7f5d3SJohn Marino   do {                                                  \
3686d7f5d3SJohn Marino     mp_limb_t __cy;                                     \
3786d7f5d3SJohn Marino     __cy = mpn_mul_1 (dst, src, size, n);               \
3886d7f5d3SJohn Marino     (cout) = __cy + mpn_add_1 (dst, dst, size, cin);    \
3986d7f5d3SJohn Marino   } while (0)
4086d7f5d3SJohn Marino #endif
4186d7f5d3SJohn Marino 
4286d7f5d3SJohn Marino 
4386d7f5d3SJohn Marino /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
4486d7f5d3SJohn Marino 
4586d7f5d3SJohn Marino    All that's needed to account for negative w or x is to flip "sub".
4686d7f5d3SJohn Marino 
4786d7f5d3SJohn Marino    The final w will retain its sign, unless an underflow occurs in a submul
4886d7f5d3SJohn Marino    of absolute values, in which case it's flipped.
4986d7f5d3SJohn Marino 
5086d7f5d3SJohn Marino    If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
5186d7f5d3SJohn Marino    used.  The alternative would be mpn_mul_1 into temporary space followed
5286d7f5d3SJohn Marino    by mpn_sub_n.  Avoiding temporary space seem good, and submul+com stands
5386d7f5d3SJohn Marino    a chance of being faster since it involves only one set of carry
5486d7f5d3SJohn Marino    propagations, not two.  Note that doing an addmul_1 with a
5586d7f5d3SJohn Marino    twos-complement negative y doesn't work, because it effectively adds an
5686d7f5d3SJohn Marino    extra x * 2^GMP_LIMB_BITS.  */
5786d7f5d3SJohn Marino 
5886d7f5d3SJohn Marino REGPARM_ATTR(1) void
mpz_aorsmul_1(mpz_ptr w,mpz_srcptr x,mp_limb_t y,mp_size_t sub)5986d7f5d3SJohn Marino mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
6086d7f5d3SJohn Marino {
6186d7f5d3SJohn Marino   mp_size_t  xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
6286d7f5d3SJohn Marino   mp_srcptr  xp;
6386d7f5d3SJohn Marino   mp_ptr     wp;
6486d7f5d3SJohn Marino   mp_limb_t  cy;
6586d7f5d3SJohn Marino 
6686d7f5d3SJohn Marino   /* w unaffected if x==0 or y==0 */
6786d7f5d3SJohn Marino   xsize = SIZ (x);
6886d7f5d3SJohn Marino   if (xsize == 0 || y == 0)
6986d7f5d3SJohn Marino     return;
7086d7f5d3SJohn Marino 
7186d7f5d3SJohn Marino   sub ^= xsize;
7286d7f5d3SJohn Marino   xsize = ABS (xsize);
7386d7f5d3SJohn Marino 
7486d7f5d3SJohn Marino   wsize_signed = SIZ (w);
7586d7f5d3SJohn Marino   if (wsize_signed == 0)
7686d7f5d3SJohn Marino     {
7786d7f5d3SJohn Marino       /* nothing to add to, just set x*y, "sub" gives the sign */
7886d7f5d3SJohn Marino       MPZ_REALLOC (w, xsize+1);
7986d7f5d3SJohn Marino       wp = PTR (w);
8086d7f5d3SJohn Marino       cy = mpn_mul_1 (wp, PTR(x), xsize, y);
8186d7f5d3SJohn Marino       wp[xsize] = cy;
8286d7f5d3SJohn Marino       xsize += (cy != 0);
8386d7f5d3SJohn Marino       SIZ (w) = (sub >= 0 ? xsize : -xsize);
8486d7f5d3SJohn Marino       return;
8586d7f5d3SJohn Marino     }
8686d7f5d3SJohn Marino 
8786d7f5d3SJohn Marino   sub ^= wsize_signed;
8886d7f5d3SJohn Marino   wsize = ABS (wsize_signed);
8986d7f5d3SJohn Marino 
9086d7f5d3SJohn Marino   new_wsize = MAX (wsize, xsize);
9186d7f5d3SJohn Marino   MPZ_REALLOC (w, new_wsize+1);
9286d7f5d3SJohn Marino   wp = PTR (w);
9386d7f5d3SJohn Marino   xp = PTR (x);
9486d7f5d3SJohn Marino   min_size = MIN (wsize, xsize);
9586d7f5d3SJohn Marino 
9686d7f5d3SJohn Marino   if (sub >= 0)
9786d7f5d3SJohn Marino     {
9886d7f5d3SJohn Marino       /* addmul of absolute values */
9986d7f5d3SJohn Marino 
10086d7f5d3SJohn Marino       cy = mpn_addmul_1 (wp, xp, min_size, y);
10186d7f5d3SJohn Marino       wp += min_size;
10286d7f5d3SJohn Marino       xp += min_size;
10386d7f5d3SJohn Marino 
10486d7f5d3SJohn Marino       dsize = xsize - wsize;
10586d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
10686d7f5d3SJohn Marino       if (dsize > 0)
10786d7f5d3SJohn Marino         cy = mpn_mul_1c (wp, xp, dsize, y, cy);
10886d7f5d3SJohn Marino       else if (dsize < 0)
10986d7f5d3SJohn Marino         {
11086d7f5d3SJohn Marino           dsize = -dsize;
11186d7f5d3SJohn Marino           cy = mpn_add_1 (wp, wp, dsize, cy);
11286d7f5d3SJohn Marino         }
11386d7f5d3SJohn Marino #else
11486d7f5d3SJohn Marino       if (dsize != 0)
11586d7f5d3SJohn Marino         {
11686d7f5d3SJohn Marino           mp_limb_t  cy2;
11786d7f5d3SJohn Marino           if (dsize > 0)
11886d7f5d3SJohn Marino             cy2 = mpn_mul_1 (wp, xp, dsize, y);
11986d7f5d3SJohn Marino           else
12086d7f5d3SJohn Marino             {
12186d7f5d3SJohn Marino               dsize = -dsize;
12286d7f5d3SJohn Marino               cy2 = 0;
12386d7f5d3SJohn Marino             }
12486d7f5d3SJohn Marino           cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
12586d7f5d3SJohn Marino         }
12686d7f5d3SJohn Marino #endif
12786d7f5d3SJohn Marino 
12886d7f5d3SJohn Marino       wp[dsize] = cy;
12986d7f5d3SJohn Marino       new_wsize += (cy != 0);
13086d7f5d3SJohn Marino     }
13186d7f5d3SJohn Marino   else
13286d7f5d3SJohn Marino     {
13386d7f5d3SJohn Marino       /* submul of absolute values */
13486d7f5d3SJohn Marino 
13586d7f5d3SJohn Marino       cy = mpn_submul_1 (wp, xp, min_size, y);
13686d7f5d3SJohn Marino       if (wsize >= xsize)
13786d7f5d3SJohn Marino         {
13886d7f5d3SJohn Marino           /* if w bigger than x, then propagate borrow through it */
13986d7f5d3SJohn Marino           if (wsize != xsize)
14086d7f5d3SJohn Marino             cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
14186d7f5d3SJohn Marino 
14286d7f5d3SJohn Marino           if (cy != 0)
14386d7f5d3SJohn Marino             {
14486d7f5d3SJohn Marino               /* Borrow out of w, take twos complement negative to get
14586d7f5d3SJohn Marino                  absolute value, flip sign of w.  */
14686d7f5d3SJohn Marino               wp[new_wsize] = ~-cy;  /* extra limb is 0-cy */
14786d7f5d3SJohn Marino               mpn_com (wp, wp, new_wsize);
14886d7f5d3SJohn Marino               new_wsize++;
14986d7f5d3SJohn Marino               MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
15086d7f5d3SJohn Marino               wsize_signed = -wsize_signed;
15186d7f5d3SJohn Marino             }
15286d7f5d3SJohn Marino         }
15386d7f5d3SJohn Marino       else /* wsize < xsize */
15486d7f5d3SJohn Marino         {
15586d7f5d3SJohn Marino           /* x bigger than w, so want x*y-w.  Submul has given w-x*y, so
15686d7f5d3SJohn Marino              take twos complement and use an mpn_mul_1 for the rest.  */
15786d7f5d3SJohn Marino 
15886d7f5d3SJohn Marino           mp_limb_t  cy2;
15986d7f5d3SJohn Marino 
16086d7f5d3SJohn Marino           /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
16186d7f5d3SJohn Marino           mpn_com (wp, wp, wsize);
16286d7f5d3SJohn Marino           cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
16386d7f5d3SJohn Marino           cy -= 1;
16486d7f5d3SJohn Marino 
16586d7f5d3SJohn Marino           /* If cy-1 == -1 then hold that -1 for latter.  mpn_submul_1 never
16686d7f5d3SJohn Marino              returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
16786d7f5d3SJohn Marino           cy2 = (cy == MP_LIMB_T_MAX);
16886d7f5d3SJohn Marino           cy += cy2;
16986d7f5d3SJohn Marino           MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
17086d7f5d3SJohn Marino           wp[new_wsize] = cy;
17186d7f5d3SJohn Marino           new_wsize += (cy != 0);
17286d7f5d3SJohn Marino 
17386d7f5d3SJohn Marino           /* Apply any -1 from above.  The value at wp+wsize is non-zero
17486d7f5d3SJohn Marino              because y!=0 and the high limb of x will be non-zero.  */
17586d7f5d3SJohn Marino           if (cy2)
17686d7f5d3SJohn Marino             MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
17786d7f5d3SJohn Marino 
17886d7f5d3SJohn Marino           wsize_signed = -wsize_signed;
17986d7f5d3SJohn Marino         }
18086d7f5d3SJohn Marino 
18186d7f5d3SJohn Marino       /* submul can produce high zero limbs due to cancellation, both when w
18286d7f5d3SJohn Marino          has more limbs or x has more  */
18386d7f5d3SJohn Marino       MPN_NORMALIZE (wp, new_wsize);
18486d7f5d3SJohn Marino     }
18586d7f5d3SJohn Marino 
18686d7f5d3SJohn Marino   SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
18786d7f5d3SJohn Marino 
18886d7f5d3SJohn Marino   ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
18986d7f5d3SJohn Marino }
19086d7f5d3SJohn Marino 
19186d7f5d3SJohn Marino 
19286d7f5d3SJohn Marino void
mpz_addmul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)19386d7f5d3SJohn Marino mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
19486d7f5d3SJohn Marino {
19586d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
19686d7f5d3SJohn Marino   if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))
19786d7f5d3SJohn Marino     {
19886d7f5d3SJohn Marino       mpz_t t;
19986d7f5d3SJohn Marino       mp_ptr tp;
20086d7f5d3SJohn Marino       mp_size_t xn;
20186d7f5d3SJohn Marino       TMP_DECL;
20286d7f5d3SJohn Marino       TMP_MARK;
20386d7f5d3SJohn Marino       xn = SIZ (x);
20486d7f5d3SJohn Marino       MPZ_TMP_INIT (t, ABS (xn) + 1);
20586d7f5d3SJohn Marino       tp = PTR (t);
20686d7f5d3SJohn Marino       tp[0] = 0;
20786d7f5d3SJohn Marino       MPN_COPY (tp + 1, PTR(x), ABS (xn));
20886d7f5d3SJohn Marino       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
20986d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
21086d7f5d3SJohn Marino       PTR(t) = tp + 1;
21186d7f5d3SJohn Marino       SIZ(t) = xn;
21286d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
21386d7f5d3SJohn Marino       TMP_FREE;
21486d7f5d3SJohn Marino       return;
21586d7f5d3SJohn Marino     }
21686d7f5d3SJohn Marino #endif
21786d7f5d3SJohn Marino   mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
21886d7f5d3SJohn Marino }
21986d7f5d3SJohn Marino 
22086d7f5d3SJohn Marino void
mpz_submul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)22186d7f5d3SJohn Marino mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
22286d7f5d3SJohn Marino {
22386d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
22486d7f5d3SJohn Marino   if (y > GMP_NUMB_MAX && SIZ(x) != 0)
22586d7f5d3SJohn Marino     {
22686d7f5d3SJohn Marino       mpz_t t;
22786d7f5d3SJohn Marino       mp_ptr tp;
22886d7f5d3SJohn Marino       mp_size_t xn;
22986d7f5d3SJohn Marino       TMP_DECL;
23086d7f5d3SJohn Marino       TMP_MARK;
23186d7f5d3SJohn Marino       xn = SIZ (x);
23286d7f5d3SJohn Marino       MPZ_TMP_INIT (t, ABS (xn) + 1);
23386d7f5d3SJohn Marino       tp = PTR (t);
23486d7f5d3SJohn Marino       tp[0] = 0;
23586d7f5d3SJohn Marino       MPN_COPY (tp + 1, PTR(x), ABS (xn));
23686d7f5d3SJohn Marino       SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
23786d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
23886d7f5d3SJohn Marino       PTR(t) = tp + 1;
23986d7f5d3SJohn Marino       SIZ(t) = xn;
24086d7f5d3SJohn Marino       mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
24186d7f5d3SJohn Marino       TMP_FREE;
24286d7f5d3SJohn Marino       return;
24386d7f5d3SJohn Marino     }
24486d7f5d3SJohn Marino #endif
24586d7f5d3SJohn Marino   mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
24686d7f5d3SJohn Marino }
247