1 /* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b). 2 3 Contributed to the GNU project by Martin Boij (as part of perfpow.c). 4 5 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 6 7 This file is part of the GNU MP Library. 8 9 The GNU MP Library is free software; you can redistribute it and/or modify 10 it under the terms of the GNU Lesser General Public License as published by 11 the Free Software Foundation; either version 3 of the License, or (at your 12 option) any later version. 13 14 The GNU MP Library is distributed in the hope that it will be useful, but 15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17 License for more details. 18 19 You should have received a copy of the GNU Lesser General Public License 20 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 21 22 #include "gmp.h" 23 #include "gmp-impl.h" 24 25 /* Computes a^e (mod B). Uses right-to-left binary algorithm, since 26 typical use will have e small. */ 27 static mp_limb_t 28 powlimb (mp_limb_t a, mp_limb_t e) 29 { 30 mp_limb_t r = 1; 31 mp_limb_t s = a; 32 33 for (r = 1, s = a; e > 0; e >>= 1, s *= s) 34 if (e & 1) 35 r *= s; 36 37 return r; 38 } 39 40 /* Compute r such that r^k * y = 1 (mod B^n). 41 42 Iterates 43 r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b) 44 using Hensel lifting, each time doubling the number of known bits in r. 45 46 Works just for odd k. Else the Hensel lifting degenerates. 47 48 FIXME: 49 50 (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows). 51 52 (2) Rewrite iteration as 53 r' <-- r - k^{-1} r (r^k y - 1) 54 and take advantage of the zero low part of r^k y - 1. 55 56 (3) Use wrap-around trick. 57 58 (4) Use a small table to get starting value. 59 60 Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS). 61 */ 62 63 void 64 mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp) 65 { 66 mp_ptr tp2, tp3; 67 mp_limb_t kinv, k2, r0, y0; 68 mp_size_t order[GMP_LIMB_BITS + 1]; 69 int i, d; 70 71 ASSERT (bn > 0); 72 ASSERT ((k & 1) != 0); 73 74 tp2 = tp + bn; 75 tp3 = tp + 2 * bn; 76 k2 = k + 1; 77 78 binvert_limb (kinv, k); 79 80 /* 4-bit initial approximation: 81 82 y%16 | 1 3 5 7 9 11 13 15, 83 k%4 +----------------------------- 84 1 | 1 11 13 7 9 3 5 15 85 3 | 1 3 5 7 9 11 13 15 86 87 */ 88 y0 = yp[0]; 89 90 r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & ~(k << 2) & 8); /* 4 bits */ 91 r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f)); /* 8 bits */ 92 r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0xffff)); /* 16 bits */ 93 r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2)); /* 32 bits */ 94 #if GMP_NUMB_BITS > 32 95 { 96 unsigned prec = 32; 97 do 98 { 99 r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2)); 100 prec *= 2; 101 } 102 while (prec < GMP_NUMB_BITS); 103 } 104 #endif 105 106 rp[0] = r0; 107 if (bn == 1) 108 return; 109 110 /* This initialization doesn't matter for the result (any garbage is 111 cancelled in the iteration), but proper initialization makes 112 valgrind happier. */ 113 MPN_ZERO (rp+1, bn-1); 114 115 d = 0; 116 for (; bn > 1; bn = (bn + 1) >> 1) 117 order[d++] = bn; 118 119 for (i = d - 1; i >= 0; i--) 120 { 121 bn = order[i]; 122 123 mpn_mul_1 (tp, rp, bn, k2); 124 125 mpn_powlo (tp2, rp, &k2, 1, bn, tp3); 126 mpn_mullo_n (rp, yp, tp2, bn); 127 128 mpn_sub_n (tp2, tp, rp, bn); 129 mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0); 130 } 131 } 132