1 /* mpn_mulmid -- middle product 2 3 Contributed by David Harvey. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2011 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 27 #include "gmp.h" 28 #include "gmp-impl.h" 29 30 31 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD) 32 33 34 void 35 mpn_mulmid (mp_ptr rp, 36 mp_srcptr ap, mp_size_t an, 37 mp_srcptr bp, mp_size_t bn) 38 { 39 mp_size_t rn, k; 40 mp_ptr scratch, temp; 41 42 ASSERT (an >= bn); 43 ASSERT (bn >= 1); 44 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an)); 45 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn)); 46 47 if (bn < MULMID_TOOM42_THRESHOLD) 48 { 49 /* region not tall enough to make toom42 worthwhile for any portion */ 50 51 if (an < CHUNK) 52 { 53 /* region not too wide either, just call basecase directly */ 54 mpn_mulmid_basecase (rp, ap, an, bp, bn); 55 return; 56 } 57 58 /* Region quite wide. For better locality, use basecase on chunks: 59 60 AAABBBCC.. 61 .AAABBBCC. 62 ..AAABBBCC 63 */ 64 65 k = CHUNK - bn + 1; /* number of diagonals per chunk */ 66 67 /* first chunk (marked A in the above diagram) */ 68 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); 69 70 /* remaining chunks (B, C, etc) */ 71 an -= k; 72 73 while (an >= CHUNK) 74 { 75 mp_limb_t t0, t1, cy; 76 ap += k, rp += k; 77 t0 = rp[0], t1 = rp[1]; 78 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); 79 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ 80 MPN_INCR_U (rp + 1, k + 1, t1 + cy); 81 an -= k; 82 } 83 84 if (an >= bn) 85 { 86 /* last remaining chunk */ 87 mp_limb_t t0, t1, cy; 88 ap += k, rp += k; 89 t0 = rp[0], t1 = rp[1]; 90 mpn_mulmid_basecase (rp, ap, an, bp, bn); 91 ADDC_LIMB (cy, rp[0], rp[0], t0); 92 MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy); 93 } 94 95 return; 96 } 97 98 /* region is tall enough for toom42 */ 99 100 rn = an - bn + 1; 101 102 if (rn < MULMID_TOOM42_THRESHOLD) 103 { 104 /* region not wide enough to make toom42 worthwhile for any portion */ 105 106 TMP_DECL; 107 108 if (bn < CHUNK) 109 { 110 /* region not too tall either, just call basecase directly */ 111 mpn_mulmid_basecase (rp, ap, an, bp, bn); 112 return; 113 } 114 115 /* Region quite tall. For better locality, use basecase on chunks: 116 117 AAAAA.... 118 .AAAAA... 119 ..BBBBB.. 120 ...BBBBB. 121 ....CCCCC 122 */ 123 124 TMP_MARK; 125 126 temp = TMP_ALLOC_LIMBS (rn + 2); 127 128 /* first chunk (marked A in the above diagram) */ 129 bp += bn - CHUNK, an -= bn - CHUNK; 130 mpn_mulmid_basecase (rp, ap, an, bp, CHUNK); 131 132 /* remaining chunks (B, C, etc) */ 133 bn -= CHUNK; 134 135 while (bn >= CHUNK) 136 { 137 ap += CHUNK, bp -= CHUNK; 138 mpn_mulmid_basecase (temp, ap, an, bp, CHUNK); 139 mpn_add_n (rp, rp, temp, rn + 2); 140 bn -= CHUNK; 141 } 142 143 if (bn) 144 { 145 /* last remaining chunk */ 146 ap += CHUNK, bp -= bn; 147 mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn); 148 mpn_add_n (rp, rp, temp, rn + 2); 149 } 150 151 TMP_FREE; 152 return; 153 } 154 155 /* we're definitely going to use toom42 somewhere */ 156 157 if (bn > rn) 158 { 159 /* slice region into chunks, use toom42 on all chunks except possibly 160 the last: 161 162 AA.... 163 .AA... 164 ..BB.. 165 ...BB. 166 ....CC 167 */ 168 169 TMP_DECL; 170 TMP_MARK; 171 172 temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn)); 173 scratch = temp + rn + 2; 174 175 /* first chunk (marked A in the above diagram) */ 176 bp += bn - rn; 177 mpn_toom42_mulmid (rp, ap, bp, rn, scratch); 178 179 /* remaining chunks (B, C, etc) */ 180 bn -= rn; 181 182 while (bn >= rn) 183 { 184 ap += rn, bp -= rn; 185 mpn_toom42_mulmid (temp, ap, bp, rn, scratch); 186 mpn_add_n (rp, rp, temp, rn + 2); 187 bn -= rn; 188 } 189 190 if (bn) 191 { 192 /* last remaining chunk */ 193 ap += rn, bp -= bn; 194 mpn_mulmid (temp, ap, rn + bn - 1, bp, bn); 195 mpn_add_n (rp, rp, temp, rn + 2); 196 } 197 198 TMP_FREE; 199 } 200 else 201 { 202 /* slice region into chunks, use toom42 on all chunks except possibly 203 the last: 204 205 AAABBBCC.. 206 .AAABBBCC. 207 ..AAABBBCC 208 */ 209 210 TMP_DECL; 211 TMP_MARK; 212 213 scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn)); 214 215 /* first chunk (marked A in the above diagram) */ 216 mpn_toom42_mulmid (rp, ap, bp, bn, scratch); 217 218 /* remaining chunks (B, C, etc) */ 219 rn -= bn; 220 221 while (rn >= bn) 222 { 223 mp_limb_t t0, t1, cy; 224 ap += bn, rp += bn; 225 t0 = rp[0], t1 = rp[1]; 226 mpn_toom42_mulmid (rp, ap, bp, bn, scratch); 227 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ 228 MPN_INCR_U (rp + 1, bn + 1, t1 + cy); 229 rn -= bn; 230 } 231 232 TMP_FREE; 233 234 if (rn) 235 { 236 /* last remaining chunk */ 237 mp_limb_t t0, t1, cy; 238 ap += bn, rp += bn; 239 t0 = rp[0], t1 = rp[1]; 240 mpn_mulmid (rp, ap, rn + bn - 1, bp, bn); 241 ADDC_LIMB (cy, rp[0], rp[0], t0); 242 MPN_INCR_U (rp + 1, rn + 1, t1 + cy); 243 } 244 } 245 } 246