xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/mulmid.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_mulmid -- middle product
2 
3    Contributed by David Harvey.
4 
5    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
6    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
7    GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
8 
9 Copyright 2011 Free Software Foundation, Inc.
10 
11 This file is part of the GNU MP Library.
12 
13 The GNU MP Library is free software; you can redistribute it and/or modify
14 it under the terms of either:
15 
16   * the GNU Lesser General Public License as published by the Free
17     Software Foundation; either version 3 of the License, or (at your
18     option) any later version.
19 
20 or
21 
22   * the GNU General Public License as published by the Free Software
23     Foundation; either version 2 of the License, or (at your option) any
24     later version.
25 
26 or both in parallel, as here.
27 
28 The GNU MP Library is distributed in the hope that it will be useful, but
29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
30 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
31 for more details.
32 
33 You should have received copies of the GNU General Public License and the
34 GNU Lesser General Public License along with the GNU MP Library.  If not,
35 see https://www.gnu.org/licenses/.  */
36 
37 
38 #include "gmp-impl.h"
39 
40 
41 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
42 
43 
44 void
mpn_mulmid(mp_ptr rp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn)45 mpn_mulmid (mp_ptr rp,
46             mp_srcptr ap, mp_size_t an,
47             mp_srcptr bp, mp_size_t bn)
48 {
49   mp_size_t rn, k;
50   mp_ptr scratch, temp;
51 
52   ASSERT (an >= bn);
53   ASSERT (bn >= 1);
54   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
55   ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
56 
57   if (bn < MULMID_TOOM42_THRESHOLD)
58     {
59       /* region not tall enough to make toom42 worthwhile for any portion */
60 
61       if (an < CHUNK)
62 	{
63 	  /* region not too wide either, just call basecase directly */
64 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
65 	  return;
66 	}
67 
68       /* Region quite wide. For better locality, use basecase on chunks:
69 
70 	 AAABBBCC..
71 	 .AAABBBCC.
72 	 ..AAABBBCC
73       */
74 
75       k = CHUNK - bn + 1;    /* number of diagonals per chunk */
76 
77       /* first chunk (marked A in the above diagram) */
78       mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
79 
80       /* remaining chunks (B, C, etc) */
81       an -= k;
82 
83       while (an >= CHUNK)
84 	{
85 	  mp_limb_t t0, t1, cy;
86 	  ap += k, rp += k;
87 	  t0 = rp[0], t1 = rp[1];
88 	  mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
89 	  ADDC_LIMB (cy, rp[0], rp[0], t0);    /* add back saved limbs */
90 	  MPN_INCR_U (rp + 1, k + 1, t1 + cy);
91 	  an -= k;
92 	}
93 
94       if (an >= bn)
95 	{
96 	  /* last remaining chunk */
97 	  mp_limb_t t0, t1, cy;
98 	  ap += k, rp += k;
99 	  t0 = rp[0], t1 = rp[1];
100 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
101 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
102 	  MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
103 	}
104 
105       return;
106     }
107 
108   /* region is tall enough for toom42 */
109 
110   rn = an - bn + 1;
111 
112   if (rn < MULMID_TOOM42_THRESHOLD)
113     {
114       /* region not wide enough to make toom42 worthwhile for any portion */
115 
116       TMP_DECL;
117 
118       if (bn < CHUNK)
119 	{
120 	  /* region not too tall either, just call basecase directly */
121 	  mpn_mulmid_basecase (rp, ap, an, bp, bn);
122 	  return;
123 	}
124 
125       /* Region quite tall. For better locality, use basecase on chunks:
126 
127 	 AAAAA....
128 	 .AAAAA...
129 	 ..BBBBB..
130 	 ...BBBBB.
131 	 ....CCCCC
132       */
133 
134       TMP_MARK;
135 
136       temp = TMP_ALLOC_LIMBS (rn + 2);
137 
138       /* first chunk (marked A in the above diagram) */
139       bp += bn - CHUNK, an -= bn - CHUNK;
140       mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
141 
142       /* remaining chunks (B, C, etc) */
143       bn -= CHUNK;
144 
145       while (bn >= CHUNK)
146 	{
147 	  ap += CHUNK, bp -= CHUNK;
148 	  mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
149 	  mpn_add_n (rp, rp, temp, rn + 2);
150 	  bn -= CHUNK;
151 	}
152 
153       if (bn)
154 	{
155 	  /* last remaining chunk */
156 	  ap += CHUNK, bp -= bn;
157 	  mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
158 	  mpn_add_n (rp, rp, temp, rn + 2);
159 	}
160 
161       TMP_FREE;
162       return;
163     }
164 
165   /* we're definitely going to use toom42 somewhere */
166 
167   if (bn > rn)
168     {
169       /* slice region into chunks, use toom42 on all chunks except possibly
170 	 the last:
171 
172          AA....
173          .AA...
174          ..BB..
175          ...BB.
176          ....CC
177       */
178 
179       TMP_DECL;
180       TMP_MARK;
181 
182       temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
183       scratch = temp + rn + 2;
184 
185       /* first chunk (marked A in the above diagram) */
186       bp += bn - rn;
187       mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
188 
189       /* remaining chunks (B, C, etc) */
190       bn -= rn;
191 
192       while (bn >= rn)
193         {
194           ap += rn, bp -= rn;
195 	  mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
196           mpn_add_n (rp, rp, temp, rn + 2);
197           bn -= rn;
198         }
199 
200       if (bn)
201         {
202           /* last remaining chunk */
203           ap += rn, bp -= bn;
204 	  mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
205           mpn_add_n (rp, rp, temp, rn + 2);
206         }
207 
208       TMP_FREE;
209     }
210   else
211     {
212       /* slice region into chunks, use toom42 on all chunks except possibly
213 	 the last:
214 
215          AAABBBCC..
216          .AAABBBCC.
217          ..AAABBBCC
218       */
219 
220       TMP_DECL;
221       TMP_MARK;
222 
223       scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
224 
225       /* first chunk (marked A in the above diagram) */
226       mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
227 
228       /* remaining chunks (B, C, etc) */
229       rn -= bn;
230 
231       while (rn >= bn)
232         {
233 	  mp_limb_t t0, t1, cy;
234           ap += bn, rp += bn;
235           t0 = rp[0], t1 = rp[1];
236           mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
237 	  ADDC_LIMB (cy, rp[0], rp[0], t0);     /* add back saved limbs */
238 	  MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
239 	  rn -= bn;
240         }
241 
242       TMP_FREE;
243 
244       if (rn)
245         {
246           /* last remaining chunk */
247 	  mp_limb_t t0, t1, cy;
248           ap += bn, rp += bn;
249           t0 = rp[0], t1 = rp[1];
250           mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
251 	  ADDC_LIMB (cy, rp[0], rp[0], t0);
252 	  MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
253         }
254     }
255 }
256