1 /* hgcd_reduce.c. 2 3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 7 Copyright 2011, 2012 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 #include "gmp.h" 25 #include "gmp-impl.h" 26 #include "longlong.h" 27 28 /* Computes R -= A * B. Result must be non-negative. Normalized down 29 to size an, and resulting size is returned. */ 30 static mp_size_t 31 submul (mp_ptr rp, mp_size_t rn, 32 mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) 33 { 34 mp_ptr tp; 35 TMP_DECL; 36 37 ASSERT (bn > 0); 38 ASSERT (an >= bn); 39 ASSERT (rn >= an); 40 ASSERT (an + bn <= rn + 1); 41 42 TMP_MARK; 43 tp = TMP_ALLOC_LIMBS (an + bn); 44 45 mpn_mul (tp, ap, an, bp, bn); 46 if (an + bn > rn) 47 { 48 ASSERT (tp[rn] == 0); 49 bn--; 50 } 51 ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn)); 52 TMP_FREE; 53 54 while (rn > an && (rp[rn-1] == 0)) 55 rn--; 56 57 return rn; 58 } 59 60 /* Computes (a, b) <-- M^{-1} (a; b) */ 61 /* FIXME: 62 x Take scratch parameter, and figure out scratch need. 63 64 x Use some fallback for small M->n? 65 */ 66 static mp_size_t 67 hgcd_matrix_apply (const struct hgcd_matrix *M, 68 mp_ptr ap, mp_ptr bp, 69 mp_size_t n) 70 { 71 mp_size_t an, bn, un, vn, nn; 72 mp_size_t mn[2][2]; 73 mp_size_t modn; 74 mp_ptr tp, sp, scratch; 75 mp_limb_t cy; 76 unsigned i, j; 77 78 TMP_DECL; 79 80 ASSERT ( (ap[n-1] | bp[n-1]) > 0); 81 82 an = n; 83 MPN_NORMALIZE (ap, an); 84 bn = n; 85 MPN_NORMALIZE (bp, bn); 86 87 for (i = 0; i < 2; i++) 88 for (j = 0; j < 2; j++) 89 { 90 mp_size_t k; 91 k = M->n; 92 MPN_NORMALIZE (M->p[i][j], k); 93 mn[i][j] = k; 94 } 95 96 ASSERT (mn[0][0] > 0); 97 ASSERT (mn[1][1] > 0); 98 ASSERT ( (mn[0][1] | mn[1][0]) > 0); 99 100 TMP_MARK; 101 102 if (mn[0][1] == 0) 103 { 104 /* A unchanged, M = (1, 0; q, 1) */ 105 ASSERT (mn[0][0] == 1); 106 ASSERT (M->p[0][0][0] == 1); 107 ASSERT (mn[1][1] == 1); 108 ASSERT (M->p[1][1][0] == 1); 109 110 /* Put B <-- B - q A */ 111 nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]); 112 } 113 else if (mn[1][0] == 0) 114 { 115 /* B unchanged, M = (1, q; 0, 1) */ 116 ASSERT (mn[0][0] == 1); 117 ASSERT (M->p[0][0][0] == 1); 118 ASSERT (mn[1][1] == 1); 119 ASSERT (M->p[1][1][0] == 1); 120 121 /* Put A <-- A - q * B */ 122 nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]); 123 } 124 else 125 { 126 /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01. 127 B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */ 128 un = MIN (an - mn[0][0], bn - mn[1][0]) + 1; 129 vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1; 130 131 nn = MAX (un, vn); 132 /* In the range of interest, mulmod_bnm1 should always beat mullo. */ 133 modn = mpn_mulmod_bnm1_next_size (nn + 1); 134 135 scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n)); 136 tp = TMP_ALLOC_LIMBS (modn); 137 sp = TMP_ALLOC_LIMBS (modn); 138 139 ASSERT (n <= 2*modn); 140 141 if (n > modn) 142 { 143 cy = mpn_add (ap, ap, modn, ap + modn, n - modn); 144 MPN_INCR_U (ap, modn, cy); 145 146 cy = mpn_add (bp, bp, modn, bp + modn, n - modn); 147 MPN_INCR_U (bp, modn, cy); 148 149 n = modn; 150 } 151 152 mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch); 153 mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch); 154 155 /* FIXME: Handle the small n case in some better way. */ 156 if (n + mn[1][1] < modn) 157 MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]); 158 if (n + mn[0][1] < modn) 159 MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]); 160 161 cy = mpn_sub_n (tp, tp, sp, modn); 162 MPN_DECR_U (tp, modn, cy); 163 164 ASSERT (mpn_zero_p (tp + nn, modn - nn)); 165 166 mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch); 167 MPN_COPY (ap, tp, nn); 168 mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch); 169 170 if (n + mn[1][0] < modn) 171 MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]); 172 if (n + mn[0][0] < modn) 173 MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]); 174 175 cy = mpn_sub_n (tp, tp, sp, modn); 176 MPN_DECR_U (tp, modn, cy); 177 178 ASSERT (mpn_zero_p (tp + nn, modn - nn)); 179 MPN_COPY (bp, tp, nn); 180 181 while ( (ap[nn-1] | bp[nn-1]) == 0) 182 { 183 nn--; 184 ASSERT (nn > 0); 185 } 186 } 187 TMP_FREE; 188 189 return nn; 190 } 191 192 mp_size_t 193 mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p) 194 { 195 mp_size_t itch; 196 if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD)) 197 { 198 itch = mpn_hgcd_itch (n-p); 199 200 /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 * 201 (p + ceil((n-p)/2) - 1 <= n + p - 1 */ 202 if (itch < n + p - 1) 203 itch = n + p - 1; 204 } 205 else 206 { 207 itch = 2*(n-p) + mpn_hgcd_itch (n-p); 208 /* Currently, hgcd_matrix_apply allocates its own storage. */ 209 } 210 return itch; 211 } 212 213 /* FIXME: Document storage need. */ 214 mp_size_t 215 mpn_hgcd_reduce (struct hgcd_matrix *M, 216 mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p, 217 mp_ptr tp) 218 { 219 mp_size_t nn; 220 if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD)) 221 { 222 nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp); 223 if (nn > 0) 224 /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1) 225 = 2 (n - 1) */ 226 return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp); 227 } 228 else 229 { 230 MPN_COPY (tp, ap + p, n - p); 231 MPN_COPY (tp + n - p, bp + p, n - p); 232 if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p))) 233 return hgcd_matrix_apply (M, ap, bp, n); 234 } 235 return 0; 236 } 237