xref: /dflybsd-src/contrib/gmp/mpf/mul_ui.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
1*86d7f5d3SJohn Marino /* mpf_mul_ui -- Multiply a float and an unsigned integer.
2*86d7f5d3SJohn Marino 
3*86d7f5d3SJohn Marino Copyright 1993, 1994, 1996, 2001, 2003, 2004 Free Software Foundation, Inc.
4*86d7f5d3SJohn Marino 
5*86d7f5d3SJohn Marino This file is part of the GNU MP Library.
6*86d7f5d3SJohn Marino 
7*86d7f5d3SJohn Marino The GNU MP Library is free software; you can redistribute it and/or modify
8*86d7f5d3SJohn Marino it under the terms of the GNU Lesser General Public License as published by
9*86d7f5d3SJohn Marino the Free Software Foundation; either version 3 of the License, or (at your
10*86d7f5d3SJohn Marino option) any later version.
11*86d7f5d3SJohn Marino 
12*86d7f5d3SJohn Marino The GNU MP Library is distributed in the hope that it will be useful, but
13*86d7f5d3SJohn Marino WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14*86d7f5d3SJohn Marino or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15*86d7f5d3SJohn Marino License for more details.
16*86d7f5d3SJohn Marino 
17*86d7f5d3SJohn Marino You should have received a copy of the GNU Lesser General Public License
18*86d7f5d3SJohn Marino along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
19*86d7f5d3SJohn Marino 
20*86d7f5d3SJohn Marino #include "gmp.h"
21*86d7f5d3SJohn Marino #include "gmp-impl.h"
22*86d7f5d3SJohn Marino #include "longlong.h"
23*86d7f5d3SJohn Marino 
24*86d7f5d3SJohn Marino 
25*86d7f5d3SJohn Marino /* The core operation is a multiply of PREC(r) limbs from u by v, producing
26*86d7f5d3SJohn Marino    either PREC(r) or PREC(r)+1 result limbs.  If u is shorter than PREC(r),
27*86d7f5d3SJohn Marino    then we take only as much as it has.  If u is longer we incorporate a
28*86d7f5d3SJohn Marino    carry from the lower limbs.
29*86d7f5d3SJohn Marino 
30*86d7f5d3SJohn Marino    If u has just 1 extra limb, then the carry to add is high(up[0]*v).  That
31*86d7f5d3SJohn Marino    is of course what mpn_mul_1 would do if it was called with PREC(r)+1
32*86d7f5d3SJohn Marino    limbs of input.
33*86d7f5d3SJohn Marino 
34*86d7f5d3SJohn Marino    If u has more than 1 extra limb, then there can be a further carry bit
35*86d7f5d3SJohn Marino    out of lower uncalculated limbs (the way the low of one product adds to
36*86d7f5d3SJohn Marino    the high of the product below it).  This is of course what an mpn_mul_1
37*86d7f5d3SJohn Marino    would do if it was called with the full u operand.  But we instead work
38*86d7f5d3SJohn Marino    downwards explicitly, until a carry occurs or until a value other than
39*86d7f5d3SJohn Marino    GMP_NUMB_MAX occurs (that being the only value a carry bit can propagate
40*86d7f5d3SJohn Marino    across).
41*86d7f5d3SJohn Marino 
42*86d7f5d3SJohn Marino    The carry determination normally requires two umul_ppmm's, only rarely
43*86d7f5d3SJohn Marino    will GMP_NUMB_MAX occur and require further products.
44*86d7f5d3SJohn Marino 
45*86d7f5d3SJohn Marino    The carry limb is conveniently added into the mul_1 using mpn_mul_1c when
46*86d7f5d3SJohn Marino    that function exists, otherwise a subsequent mpn_add_1 is needed.
47*86d7f5d3SJohn Marino 
48*86d7f5d3SJohn Marino    Clearly when mpn_mul_1c is used the carry must be calculated first.  But
49*86d7f5d3SJohn Marino    this is also the case when add_1 is used, since if r==u and ABSIZ(r) >
50*86d7f5d3SJohn Marino    PREC(r) then the mpn_mul_1 overwrites the low part of the input.
51*86d7f5d3SJohn Marino 
52*86d7f5d3SJohn Marino    A reuse r==u with size > prec can occur from a size PREC(r)+1 in the
53*86d7f5d3SJohn Marino    usual way, or it can occur from an mpf_set_prec_raw leaving a bigger
54*86d7f5d3SJohn Marino    sized value.  In both cases we can end up calling mpn_mul_1 with
55*86d7f5d3SJohn Marino    overlapping src and dst regions, but this will be with dst < src and such
56*86d7f5d3SJohn Marino    an overlap is permitted.
57*86d7f5d3SJohn Marino 
58*86d7f5d3SJohn Marino    Not done:
59*86d7f5d3SJohn Marino 
60*86d7f5d3SJohn Marino    No attempt is made to determine in advance whether the result will be
61*86d7f5d3SJohn Marino    PREC(r) or PREC(r)+1 limbs.  If it's going to be PREC(r)+1 then we could
62*86d7f5d3SJohn Marino    take one less limb from u and generate just PREC(r), that of course
63*86d7f5d3SJohn Marino    satisfying application requested precision.  But any test counting bits
64*86d7f5d3SJohn Marino    or forming the high product would almost certainly take longer than the
65*86d7f5d3SJohn Marino    incremental cost of an extra limb in mpn_mul_1.
66*86d7f5d3SJohn Marino 
67*86d7f5d3SJohn Marino    Enhancements:
68*86d7f5d3SJohn Marino 
69*86d7f5d3SJohn Marino    Repeated mpf_mul_ui's with an even v will accumulate low zero bits on the
70*86d7f5d3SJohn Marino    result, leaving low zero limbs after a while, which it might be nice to
71*86d7f5d3SJohn Marino    strip to save work in subsequent operations.  Calculating the low limb
72*86d7f5d3SJohn Marino    explicitly would let us direct mpn_mul_1 to put the balance at rp when
73*86d7f5d3SJohn Marino    the low is zero (instead of normally rp+1).  But it's not clear whether
74*86d7f5d3SJohn Marino    this would be worthwhile.  Explicit code for the low limb will probably
75*86d7f5d3SJohn Marino    be slower than having it done in mpn_mul_1, so we need to consider how
76*86d7f5d3SJohn Marino    often a zero will be stripped and how much that's likely to save
77*86d7f5d3SJohn Marino    later.  */
78*86d7f5d3SJohn Marino 
79*86d7f5d3SJohn Marino void
mpf_mul_ui(mpf_ptr r,mpf_srcptr u,unsigned long int v)80*86d7f5d3SJohn Marino mpf_mul_ui (mpf_ptr r, mpf_srcptr u, unsigned long int v)
81*86d7f5d3SJohn Marino {
82*86d7f5d3SJohn Marino   mp_srcptr up;
83*86d7f5d3SJohn Marino   mp_size_t usize;
84*86d7f5d3SJohn Marino   mp_size_t size;
85*86d7f5d3SJohn Marino   mp_size_t prec, excess;
86*86d7f5d3SJohn Marino   mp_limb_t cy_limb, vl, cbit, cin;
87*86d7f5d3SJohn Marino   mp_ptr rp;
88*86d7f5d3SJohn Marino 
89*86d7f5d3SJohn Marino   usize = u->_mp_size;
90*86d7f5d3SJohn Marino   if (UNLIKELY (v == 0) || UNLIKELY (usize == 0))
91*86d7f5d3SJohn Marino     {
92*86d7f5d3SJohn Marino       r->_mp_size = 0;
93*86d7f5d3SJohn Marino       r->_mp_exp = 0;
94*86d7f5d3SJohn Marino       return;
95*86d7f5d3SJohn Marino     }
96*86d7f5d3SJohn Marino 
97*86d7f5d3SJohn Marino #if BITS_PER_ULONG > GMP_NUMB_BITS  /* avoid warnings about shift amount */
98*86d7f5d3SJohn Marino   if (v > GMP_NUMB_MAX)
99*86d7f5d3SJohn Marino     {
100*86d7f5d3SJohn Marino       mpf_t     vf;
101*86d7f5d3SJohn Marino       mp_limb_t vp[2];
102*86d7f5d3SJohn Marino       vp[0] = v & GMP_NUMB_MASK;
103*86d7f5d3SJohn Marino       vp[1] = v >> GMP_NUMB_BITS;
104*86d7f5d3SJohn Marino       PTR(vf) = vp;
105*86d7f5d3SJohn Marino       SIZ(vf) = 2;
106*86d7f5d3SJohn Marino       ASSERT_CODE (PREC(vf) = 2);
107*86d7f5d3SJohn Marino       EXP(vf) = 2;
108*86d7f5d3SJohn Marino       mpf_mul (r, u, vf);
109*86d7f5d3SJohn Marino       return;
110*86d7f5d3SJohn Marino     }
111*86d7f5d3SJohn Marino #endif
112*86d7f5d3SJohn Marino 
113*86d7f5d3SJohn Marino   size = ABS (usize);
114*86d7f5d3SJohn Marino   prec = r->_mp_prec;
115*86d7f5d3SJohn Marino   up = u->_mp_d;
116*86d7f5d3SJohn Marino   vl = v;
117*86d7f5d3SJohn Marino   excess = size - prec;
118*86d7f5d3SJohn Marino   cin = 0;
119*86d7f5d3SJohn Marino 
120*86d7f5d3SJohn Marino   if (excess > 0)
121*86d7f5d3SJohn Marino     {
122*86d7f5d3SJohn Marino       /* up is bigger than desired rp, shorten it to prec limbs and
123*86d7f5d3SJohn Marino          determine a carry-in */
124*86d7f5d3SJohn Marino 
125*86d7f5d3SJohn Marino       mp_limb_t  vl_shifted = vl << GMP_NAIL_BITS;
126*86d7f5d3SJohn Marino       mp_limb_t  hi, lo, next_lo, sum;
127*86d7f5d3SJohn Marino       mp_size_t  i;
128*86d7f5d3SJohn Marino 
129*86d7f5d3SJohn Marino       /* high limb of top product */
130*86d7f5d3SJohn Marino       i = excess - 1;
131*86d7f5d3SJohn Marino       umul_ppmm (cin, lo, up[i], vl_shifted);
132*86d7f5d3SJohn Marino 
133*86d7f5d3SJohn Marino       /* and carry bit out of products below that, if any */
134*86d7f5d3SJohn Marino       for (;;)
135*86d7f5d3SJohn Marino         {
136*86d7f5d3SJohn Marino           i--;
137*86d7f5d3SJohn Marino           if (i < 0)
138*86d7f5d3SJohn Marino             break;
139*86d7f5d3SJohn Marino 
140*86d7f5d3SJohn Marino           umul_ppmm (hi, next_lo, up[i], vl_shifted);
141*86d7f5d3SJohn Marino           lo >>= GMP_NAIL_BITS;
142*86d7f5d3SJohn Marino           ADDC_LIMB (cbit, sum, hi, lo);
143*86d7f5d3SJohn Marino           cin += cbit;
144*86d7f5d3SJohn Marino           lo = next_lo;
145*86d7f5d3SJohn Marino 
146*86d7f5d3SJohn Marino           /* Continue only if the sum is GMP_NUMB_MAX.  GMP_NUMB_MAX is the
147*86d7f5d3SJohn Marino              only value a carry from below can propagate across.  If we've
148*86d7f5d3SJohn Marino              just seen the carry out (ie. cbit!=0) then sum!=GMP_NUMB_MAX,
149*86d7f5d3SJohn Marino              so this test stops us for that case too.  */
150*86d7f5d3SJohn Marino           if (LIKELY (sum != GMP_NUMB_MAX))
151*86d7f5d3SJohn Marino             break;
152*86d7f5d3SJohn Marino         }
153*86d7f5d3SJohn Marino 
154*86d7f5d3SJohn Marino       up += excess;
155*86d7f5d3SJohn Marino       size = prec;
156*86d7f5d3SJohn Marino     }
157*86d7f5d3SJohn Marino 
158*86d7f5d3SJohn Marino   rp = r->_mp_d;
159*86d7f5d3SJohn Marino #if HAVE_NATIVE_mpn_mul_1c
160*86d7f5d3SJohn Marino   cy_limb = mpn_mul_1c (rp, up, size, vl, cin);
161*86d7f5d3SJohn Marino #else
162*86d7f5d3SJohn Marino   cy_limb = mpn_mul_1 (rp, up, size, vl);
163*86d7f5d3SJohn Marino   __GMPN_ADD_1 (cbit, rp, rp, size, cin);
164*86d7f5d3SJohn Marino   cy_limb += cbit;
165*86d7f5d3SJohn Marino #endif
166*86d7f5d3SJohn Marino   rp[size] = cy_limb;
167*86d7f5d3SJohn Marino   cy_limb = cy_limb != 0;
168*86d7f5d3SJohn Marino   r->_mp_exp = u->_mp_exp + cy_limb;
169*86d7f5d3SJohn Marino   size += cy_limb;
170*86d7f5d3SJohn Marino   r->_mp_size = usize >= 0 ? size : -size;
171*86d7f5d3SJohn Marino }
172