xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/toom22_mul.c (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1 /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn.  Or more
2    accurately, bn <= an < 2bn.
3 
4    Contributed to the GNU project by Torbjorn Granlund.
5 
6    THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
7    SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
8    GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9 
10 Copyright 2006-2010, 2012, 2014, 2018 Free Software Foundation, Inc.
11 
12 This file is part of the GNU MP Library.
13 
14 The GNU MP Library is free software; you can redistribute it and/or modify
15 it under the terms of either:
16 
17   * the GNU Lesser General Public License as published by the Free
18     Software Foundation; either version 3 of the License, or (at your
19     option) any later version.
20 
21 or
22 
23   * the GNU General Public License as published by the Free Software
24     Foundation; either version 2 of the License, or (at your option) any
25     later version.
26 
27 or both in parallel, as here.
28 
29 The GNU MP Library is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
31 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
32 for more details.
33 
34 You should have received copies of the GNU General Public License and the
35 GNU Lesser General Public License along with the GNU MP Library.  If not,
36 see https://www.gnu.org/licenses/.  */
37 
38 
39 #include "gmp-impl.h"
40 
41 /* Evaluate in: -1, 0, +inf
42 
43   <-s--><--n-->
44    ____ ______
45   |_a1_|___a0_|
46    |b1_|___b0_|
47    <-t-><--n-->
48 
49   v0  =  a0     * b0       #   A(0)*B(0)
50   vm1 = (a0- a1)*(b0- b1)  #  A(-1)*B(-1)
51   vinf=      a1 *     b1   # A(inf)*B(inf)
52 */
53 
54 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
55 #define MAYBE_mul_toom22   1
56 #else
57 #define MAYBE_mul_toom22						\
58   (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD)
59 #endif
60 
61 #define TOOM22_MUL_N_REC(p, a, b, n, ws)				\
62   do {									\
63     if (! MAYBE_mul_toom22						\
64 	|| BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))			\
65       mpn_mul_basecase (p, a, n, b, n);					\
66     else								\
67       mpn_toom22_mul (p, a, n, b, n, ws);				\
68   } while (0)
69 
70 /* Normally, this calls mul_basecase or toom22_mul.  But when when the fraction
71    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small
72    relative unbalance will become a larger and larger relative unbalance with
73    each recursion (the difference s-t will be invariant over recursive calls).
74    Therefore, we need to call toom32_mul.  FIXME: Suppress depending on
75    MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD.  */
76 #define TOOM22_MUL_REC(p, a, an, b, bn, ws)				\
77   do {									\
78     if (! MAYBE_mul_toom22						\
79 	|| BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD))			\
80       mpn_mul_basecase (p, a, an, b, bn);				\
81     else if (4 * an < 5 * bn)						\
82       mpn_toom22_mul (p, a, an, b, bn, ws);				\
83     else								\
84       mpn_toom32_mul (p, a, an, b, bn, ws);				\
85   } while (0)
86 
87 void
mpn_toom22_mul(mp_ptr pp,mp_srcptr ap,mp_size_t an,mp_srcptr bp,mp_size_t bn,mp_ptr scratch)88 mpn_toom22_mul (mp_ptr pp,
89 		mp_srcptr ap, mp_size_t an,
90 		mp_srcptr bp, mp_size_t bn,
91 		mp_ptr scratch)
92 {
93   const int __gmpn_cpuvec_initialized = 1;
94   mp_size_t n, s, t;
95   int vm1_neg;
96   mp_limb_t cy, cy2;
97   mp_ptr asm1;
98   mp_ptr bsm1;
99 
100 #define a0  ap
101 #define a1  (ap + n)
102 #define b0  bp
103 #define b1  (bp + n)
104 
105   s = an >> 1;
106   n = an - s;
107   t = bn - n;
108 
109   ASSERT (an >= bn);
110 
111   ASSERT (0 < s && s <= n && s >= n - 1);
112   ASSERT (0 < t && t <= s);
113 
114   asm1 = pp;
115   bsm1 = pp + n;
116 
117   vm1_neg = 0;
118 
119   /* Compute asm1.  */
120   if (s == n)
121     {
122       if (mpn_cmp (a0, a1, n) < 0)
123 	{
124 	  mpn_sub_n (asm1, a1, a0, n);
125 	  vm1_neg = 1;
126 	}
127       else
128 	{
129 	  mpn_sub_n (asm1, a0, a1, n);
130 	}
131     }
132   else /* n - s == 1 */
133     {
134       if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0)
135 	{
136 	  mpn_sub_n (asm1, a1, a0, s);
137 	  asm1[s] = 0;
138 	  vm1_neg = 1;
139 	}
140       else
141 	{
142 	  asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s);
143 	}
144     }
145 
146   /* Compute bsm1.  */
147   if (t == n)
148     {
149       if (mpn_cmp (b0, b1, n) < 0)
150 	{
151 	  mpn_sub_n (bsm1, b1, b0, n);
152 	  vm1_neg ^= 1;
153 	}
154       else
155 	{
156 	  mpn_sub_n (bsm1, b0, b1, n);
157 	}
158     }
159   else
160     {
161       if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0)
162 	{
163 	  mpn_sub_n (bsm1, b1, b0, t);
164 	  MPN_ZERO (bsm1 + t, n - t);
165 	  vm1_neg ^= 1;
166 	}
167       else
168 	{
169 	  mpn_sub (bsm1, b0, n, b1, t);
170 	}
171     }
172 
173 #define v0	pp				/* 2n */
174 #define vinf	(pp + 2 * n)			/* s+t */
175 #define vm1	scratch				/* 2n */
176 #define scratch_out	scratch + 2 * n
177 
178   /* vm1, 2n limbs */
179   TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out);
180 
181   if (s > t)  TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out);
182   else        TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out);
183 
184   /* v0, 2n limbs */
185   TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out);
186 
187   /* H(v0) + L(vinf) */
188   cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n);
189 
190   /* L(v0) + H(v0) */
191   cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n);
192 
193   /* L(vinf) + H(vinf) */
194   cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n);
195 
196   if (vm1_neg)
197     cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n);
198   else {
199     cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n);
200     if (UNLIKELY (cy + 1 == 0)) { /* cy is negative */
201       /* The total contribution of v0+vinf-vm1 can not be negative. */
202 #if WANT_ASSERT
203       /* The borrow in cy stops the propagation of the carry cy2, */
204       ASSERT (cy2 == 1);
205       cy += mpn_add_1 (pp + 2 * n, pp + 2 * n, n, cy2);
206       ASSERT (cy == 0);
207 #else
208       /* we simply fill the area with zeros. */
209       MPN_FILL (pp + 2 * n, n, 0);
210 #endif
211       return;
212     }
213   }
214 
215   ASSERT (cy  <= 2);
216   ASSERT (cy2 <= 2);
217 
218   MPN_INCR_U (pp + 2 * n, s + t, cy2);
219   /* if s+t==n, cy is zero, but we should not access pp[3*n] at all. */
220   MPN_INCR_U (pp + 3 * n, s + t - n, cy);
221 }
222