186d7f5d3SJohn Marino /* mpz_addmul_ui, mpz_submul_ui - add or subtract small multiple.
286d7f5d3SJohn Marino
386d7f5d3SJohn Marino THE mpz_aorsmul_1 FUNCTION IN THIS FILE IS FOR INTERNAL USE ONLY AND IS
486d7f5d3SJohn Marino ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
586d7f5d3SJohn Marino COMPLETELY IN FUTURE GNU MP RELEASES.
686d7f5d3SJohn Marino
786d7f5d3SJohn Marino Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
886d7f5d3SJohn Marino
986d7f5d3SJohn Marino This file is part of the GNU MP Library.
1086d7f5d3SJohn Marino
1186d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
1286d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
1386d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
1486d7f5d3SJohn Marino option) any later version.
1586d7f5d3SJohn Marino
1686d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
1786d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
1886d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
1986d7f5d3SJohn Marino License for more details.
2086d7f5d3SJohn Marino
2186d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
2286d7f5d3SJohn Marino along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
2386d7f5d3SJohn Marino
2486d7f5d3SJohn Marino #include "gmp.h"
2586d7f5d3SJohn Marino #include "gmp-impl.h"
2686d7f5d3SJohn Marino
2786d7f5d3SJohn Marino
2886d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
2986d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin) \
3086d7f5d3SJohn Marino do { \
3186d7f5d3SJohn Marino (cout) = mpn_mul_1c (dst, src, size, n, cin); \
3286d7f5d3SJohn Marino } while (0)
3386d7f5d3SJohn Marino #else
3486d7f5d3SJohn Marino #define MPN_MUL_1C(cout, dst, src, size, n, cin) \
3586d7f5d3SJohn Marino do { \
3686d7f5d3SJohn Marino mp_limb_t __cy; \
3786d7f5d3SJohn Marino __cy = mpn_mul_1 (dst, src, size, n); \
3886d7f5d3SJohn Marino (cout) = __cy + mpn_add_1 (dst, dst, size, cin); \
3986d7f5d3SJohn Marino } while (0)
4086d7f5d3SJohn Marino #endif
4186d7f5d3SJohn Marino
4286d7f5d3SJohn Marino
4386d7f5d3SJohn Marino /* sub>=0 means an addmul w += x*y, sub<0 means a submul w -= x*y.
4486d7f5d3SJohn Marino
4586d7f5d3SJohn Marino All that's needed to account for negative w or x is to flip "sub".
4686d7f5d3SJohn Marino
4786d7f5d3SJohn Marino The final w will retain its sign, unless an underflow occurs in a submul
4886d7f5d3SJohn Marino of absolute values, in which case it's flipped.
4986d7f5d3SJohn Marino
5086d7f5d3SJohn Marino If x has more limbs than w, then mpn_submul_1 followed by mpn_com is
5186d7f5d3SJohn Marino used. The alternative would be mpn_mul_1 into temporary space followed
5286d7f5d3SJohn Marino by mpn_sub_n. Avoiding temporary space seem good, and submul+com stands
5386d7f5d3SJohn Marino a chance of being faster since it involves only one set of carry
5486d7f5d3SJohn Marino propagations, not two. Note that doing an addmul_1 with a
5586d7f5d3SJohn Marino twos-complement negative y doesn't work, because it effectively adds an
5686d7f5d3SJohn Marino extra x * 2^GMP_LIMB_BITS. */
5786d7f5d3SJohn Marino
5886d7f5d3SJohn Marino REGPARM_ATTR(1) void
mpz_aorsmul_1(mpz_ptr w,mpz_srcptr x,mp_limb_t y,mp_size_t sub)5986d7f5d3SJohn Marino mpz_aorsmul_1 (mpz_ptr w, mpz_srcptr x, mp_limb_t y, mp_size_t sub)
6086d7f5d3SJohn Marino {
6186d7f5d3SJohn Marino mp_size_t xsize, wsize, wsize_signed, new_wsize, min_size, dsize;
6286d7f5d3SJohn Marino mp_srcptr xp;
6386d7f5d3SJohn Marino mp_ptr wp;
6486d7f5d3SJohn Marino mp_limb_t cy;
6586d7f5d3SJohn Marino
6686d7f5d3SJohn Marino /* w unaffected if x==0 or y==0 */
6786d7f5d3SJohn Marino xsize = SIZ (x);
6886d7f5d3SJohn Marino if (xsize == 0 || y == 0)
6986d7f5d3SJohn Marino return;
7086d7f5d3SJohn Marino
7186d7f5d3SJohn Marino sub ^= xsize;
7286d7f5d3SJohn Marino xsize = ABS (xsize);
7386d7f5d3SJohn Marino
7486d7f5d3SJohn Marino wsize_signed = SIZ (w);
7586d7f5d3SJohn Marino if (wsize_signed == 0)
7686d7f5d3SJohn Marino {
7786d7f5d3SJohn Marino /* nothing to add to, just set x*y, "sub" gives the sign */
7886d7f5d3SJohn Marino MPZ_REALLOC (w, xsize+1);
7986d7f5d3SJohn Marino wp = PTR (w);
8086d7f5d3SJohn Marino cy = mpn_mul_1 (wp, PTR(x), xsize, y);
8186d7f5d3SJohn Marino wp[xsize] = cy;
8286d7f5d3SJohn Marino xsize += (cy != 0);
8386d7f5d3SJohn Marino SIZ (w) = (sub >= 0 ? xsize : -xsize);
8486d7f5d3SJohn Marino return;
8586d7f5d3SJohn Marino }
8686d7f5d3SJohn Marino
8786d7f5d3SJohn Marino sub ^= wsize_signed;
8886d7f5d3SJohn Marino wsize = ABS (wsize_signed);
8986d7f5d3SJohn Marino
9086d7f5d3SJohn Marino new_wsize = MAX (wsize, xsize);
9186d7f5d3SJohn Marino MPZ_REALLOC (w, new_wsize+1);
9286d7f5d3SJohn Marino wp = PTR (w);
9386d7f5d3SJohn Marino xp = PTR (x);
9486d7f5d3SJohn Marino min_size = MIN (wsize, xsize);
9586d7f5d3SJohn Marino
9686d7f5d3SJohn Marino if (sub >= 0)
9786d7f5d3SJohn Marino {
9886d7f5d3SJohn Marino /* addmul of absolute values */
9986d7f5d3SJohn Marino
10086d7f5d3SJohn Marino cy = mpn_addmul_1 (wp, xp, min_size, y);
10186d7f5d3SJohn Marino wp += min_size;
10286d7f5d3SJohn Marino xp += min_size;
10386d7f5d3SJohn Marino
10486d7f5d3SJohn Marino dsize = xsize - wsize;
10586d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
10686d7f5d3SJohn Marino if (dsize > 0)
10786d7f5d3SJohn Marino cy = mpn_mul_1c (wp, xp, dsize, y, cy);
10886d7f5d3SJohn Marino else if (dsize < 0)
10986d7f5d3SJohn Marino {
11086d7f5d3SJohn Marino dsize = -dsize;
11186d7f5d3SJohn Marino cy = mpn_add_1 (wp, wp, dsize, cy);
11286d7f5d3SJohn Marino }
11386d7f5d3SJohn Marino #else
11486d7f5d3SJohn Marino if (dsize != 0)
11586d7f5d3SJohn Marino {
11686d7f5d3SJohn Marino mp_limb_t cy2;
11786d7f5d3SJohn Marino if (dsize > 0)
11886d7f5d3SJohn Marino cy2 = mpn_mul_1 (wp, xp, dsize, y);
11986d7f5d3SJohn Marino else
12086d7f5d3SJohn Marino {
12186d7f5d3SJohn Marino dsize = -dsize;
12286d7f5d3SJohn Marino cy2 = 0;
12386d7f5d3SJohn Marino }
12486d7f5d3SJohn Marino cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
12586d7f5d3SJohn Marino }
12686d7f5d3SJohn Marino #endif
12786d7f5d3SJohn Marino
12886d7f5d3SJohn Marino wp[dsize] = cy;
12986d7f5d3SJohn Marino new_wsize += (cy != 0);
13086d7f5d3SJohn Marino }
13186d7f5d3SJohn Marino else
13286d7f5d3SJohn Marino {
13386d7f5d3SJohn Marino /* submul of absolute values */
13486d7f5d3SJohn Marino
13586d7f5d3SJohn Marino cy = mpn_submul_1 (wp, xp, min_size, y);
13686d7f5d3SJohn Marino if (wsize >= xsize)
13786d7f5d3SJohn Marino {
13886d7f5d3SJohn Marino /* if w bigger than x, then propagate borrow through it */
13986d7f5d3SJohn Marino if (wsize != xsize)
14086d7f5d3SJohn Marino cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
14186d7f5d3SJohn Marino
14286d7f5d3SJohn Marino if (cy != 0)
14386d7f5d3SJohn Marino {
14486d7f5d3SJohn Marino /* Borrow out of w, take twos complement negative to get
14586d7f5d3SJohn Marino absolute value, flip sign of w. */
14686d7f5d3SJohn Marino wp[new_wsize] = ~-cy; /* extra limb is 0-cy */
14786d7f5d3SJohn Marino mpn_com (wp, wp, new_wsize);
14886d7f5d3SJohn Marino new_wsize++;
14986d7f5d3SJohn Marino MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
15086d7f5d3SJohn Marino wsize_signed = -wsize_signed;
15186d7f5d3SJohn Marino }
15286d7f5d3SJohn Marino }
15386d7f5d3SJohn Marino else /* wsize < xsize */
15486d7f5d3SJohn Marino {
15586d7f5d3SJohn Marino /* x bigger than w, so want x*y-w. Submul has given w-x*y, so
15686d7f5d3SJohn Marino take twos complement and use an mpn_mul_1 for the rest. */
15786d7f5d3SJohn Marino
15886d7f5d3SJohn Marino mp_limb_t cy2;
15986d7f5d3SJohn Marino
16086d7f5d3SJohn Marino /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
16186d7f5d3SJohn Marino mpn_com (wp, wp, wsize);
16286d7f5d3SJohn Marino cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
16386d7f5d3SJohn Marino cy -= 1;
16486d7f5d3SJohn Marino
16586d7f5d3SJohn Marino /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never
16686d7f5d3SJohn Marino returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
16786d7f5d3SJohn Marino cy2 = (cy == MP_LIMB_T_MAX);
16886d7f5d3SJohn Marino cy += cy2;
16986d7f5d3SJohn Marino MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
17086d7f5d3SJohn Marino wp[new_wsize] = cy;
17186d7f5d3SJohn Marino new_wsize += (cy != 0);
17286d7f5d3SJohn Marino
17386d7f5d3SJohn Marino /* Apply any -1 from above. The value at wp+wsize is non-zero
17486d7f5d3SJohn Marino because y!=0 and the high limb of x will be non-zero. */
17586d7f5d3SJohn Marino if (cy2)
17686d7f5d3SJohn Marino MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
17786d7f5d3SJohn Marino
17886d7f5d3SJohn Marino wsize_signed = -wsize_signed;
17986d7f5d3SJohn Marino }
18086d7f5d3SJohn Marino
18186d7f5d3SJohn Marino /* submul can produce high zero limbs due to cancellation, both when w
18286d7f5d3SJohn Marino has more limbs or x has more */
18386d7f5d3SJohn Marino MPN_NORMALIZE (wp, new_wsize);
18486d7f5d3SJohn Marino }
18586d7f5d3SJohn Marino
18686d7f5d3SJohn Marino SIZ (w) = (wsize_signed >= 0 ? new_wsize : -new_wsize);
18786d7f5d3SJohn Marino
18886d7f5d3SJohn Marino ASSERT (new_wsize == 0 || PTR(w)[new_wsize-1] != 0);
18986d7f5d3SJohn Marino }
19086d7f5d3SJohn Marino
19186d7f5d3SJohn Marino
19286d7f5d3SJohn Marino void
mpz_addmul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)19386d7f5d3SJohn Marino mpz_addmul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
19486d7f5d3SJohn Marino {
19586d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
19686d7f5d3SJohn Marino if (UNLIKELY (y > GMP_NUMB_MAX && SIZ(x) != 0))
19786d7f5d3SJohn Marino {
19886d7f5d3SJohn Marino mpz_t t;
19986d7f5d3SJohn Marino mp_ptr tp;
20086d7f5d3SJohn Marino mp_size_t xn;
20186d7f5d3SJohn Marino TMP_DECL;
20286d7f5d3SJohn Marino TMP_MARK;
20386d7f5d3SJohn Marino xn = SIZ (x);
20486d7f5d3SJohn Marino MPZ_TMP_INIT (t, ABS (xn) + 1);
20586d7f5d3SJohn Marino tp = PTR (t);
20686d7f5d3SJohn Marino tp[0] = 0;
20786d7f5d3SJohn Marino MPN_COPY (tp + 1, PTR(x), ABS (xn));
20886d7f5d3SJohn Marino SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
20986d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) 0);
21086d7f5d3SJohn Marino PTR(t) = tp + 1;
21186d7f5d3SJohn Marino SIZ(t) = xn;
21286d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) 0);
21386d7f5d3SJohn Marino TMP_FREE;
21486d7f5d3SJohn Marino return;
21586d7f5d3SJohn Marino }
21686d7f5d3SJohn Marino #endif
21786d7f5d3SJohn Marino mpz_aorsmul_1 (w, x, (mp_limb_t) y, (mp_size_t) 0);
21886d7f5d3SJohn Marino }
21986d7f5d3SJohn Marino
22086d7f5d3SJohn Marino void
mpz_submul_ui(mpz_ptr w,mpz_srcptr x,unsigned long y)22186d7f5d3SJohn Marino mpz_submul_ui (mpz_ptr w, mpz_srcptr x, unsigned long y)
22286d7f5d3SJohn Marino {
22386d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS
22486d7f5d3SJohn Marino if (y > GMP_NUMB_MAX && SIZ(x) != 0)
22586d7f5d3SJohn Marino {
22686d7f5d3SJohn Marino mpz_t t;
22786d7f5d3SJohn Marino mp_ptr tp;
22886d7f5d3SJohn Marino mp_size_t xn;
22986d7f5d3SJohn Marino TMP_DECL;
23086d7f5d3SJohn Marino TMP_MARK;
23186d7f5d3SJohn Marino xn = SIZ (x);
23286d7f5d3SJohn Marino MPZ_TMP_INIT (t, ABS (xn) + 1);
23386d7f5d3SJohn Marino tp = PTR (t);
23486d7f5d3SJohn Marino tp[0] = 0;
23586d7f5d3SJohn Marino MPN_COPY (tp + 1, PTR(x), ABS (xn));
23686d7f5d3SJohn Marino SIZ(t) = xn >= 0 ? xn + 1 : xn - 1;
23786d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y >> GMP_NUMB_BITS, (mp_size_t) -1);
23886d7f5d3SJohn Marino PTR(t) = tp + 1;
23986d7f5d3SJohn Marino SIZ(t) = xn;
24086d7f5d3SJohn Marino mpz_aorsmul_1 (w, t, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
24186d7f5d3SJohn Marino TMP_FREE;
24286d7f5d3SJohn Marino return;
24386d7f5d3SJohn Marino }
24486d7f5d3SJohn Marino #endif
24586d7f5d3SJohn Marino mpz_aorsmul_1 (w, x, (mp_limb_t) y & GMP_NUMB_MASK, (mp_size_t) -1);
24686d7f5d3SJohn Marino }
247