xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/generic/brootinv.c (revision 48fb7bfab72acd4281a53bbee5ccf3f809019e75)
1 /* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
2 
3    Contributed to the GNU project by Martin Boij (as part of perfpow.c).
4 
5 Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
6 
7 This file is part of the GNU MP Library.
8 
9 The GNU MP Library is free software; you can redistribute it and/or modify
10 it under the terms of the GNU Lesser General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or (at your
12 option) any later version.
13 
14 The GNU MP Library is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17 License for more details.
18 
19 You should have received a copy of the GNU Lesser General Public License
20 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
21 
22 #include "gmp.h"
23 #include "gmp-impl.h"
24 
25 /* Computes a^e (mod B). Uses right-to-left binary algorithm, since
26    typical use will have e small. */
27 static mp_limb_t
28 powlimb (mp_limb_t a, mp_limb_t e)
29 {
30   mp_limb_t r = 1;
31   mp_limb_t s = a;
32 
33   for (r = 1, s = a; e > 0; e >>= 1, s *= s)
34     if (e & 1)
35       r *= s;
36 
37   return r;
38 }
39 
40 /* Compute r such that r^k * y = 1 (mod B^n).
41 
42    Iterates
43      r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
44    using Hensel lifting, each time doubling the number of known bits in r.
45 
46    Works just for odd k.  Else the Hensel lifting degenerates.
47 
48    FIXME:
49 
50      (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
51 
52      (2) Rewrite iteration as
53 	   r' <-- r - k^{-1} r (r^k y - 1)
54 	 and take advantage of the zero low part of r^k y - 1.
55 
56      (3) Use wrap-around trick.
57 
58      (4) Use a small table to get starting value.
59 
60    Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
61 */
62 
63 void
64 mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
65 {
66   mp_ptr tp2, tp3;
67   mp_limb_t kinv, k2, r0, y0;
68   mp_size_t order[GMP_LIMB_BITS + 1];
69   int i, d;
70 
71   ASSERT (bn > 0);
72   ASSERT ((k & 1) != 0);
73 
74   tp2 = tp + bn;
75   tp3 = tp + 2 * bn;
76   k2 = k + 1;
77 
78   binvert_limb (kinv, k);
79 
80   /* 4-bit initial approximation:
81 
82    y%16 | 1  3  5  7  9 11 13 15,
83     k%4 +-----------------------------
84      1  | 1 11 13  7  9  3  5 15
85      3  | 1  3  5  7  9 11 13 15
86 
87   */
88   y0 = yp[0];
89 
90   r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & ~(k << 2) & 8);		/* 4 bits */
91   r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f));		/* 8 bits */
92   r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0xffff));	/* 16 bits */
93   r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));			/* 32 bits */
94 #if GMP_NUMB_BITS > 32
95   {
96     unsigned prec = 32;
97     do
98       {
99 	r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
100 	prec *= 2;
101       }
102     while (prec < GMP_NUMB_BITS);
103   }
104 #endif
105 
106   rp[0] = r0;
107   if (bn == 1)
108     return;
109 
110   /* This initialization doesn't matter for the result (any garbage is
111      cancelled in the iteration), but proper initialization makes
112      valgrind happier. */
113   MPN_ZERO (rp+1, bn-1);
114 
115   d = 0;
116   for (; bn > 1; bn = (bn + 1) >> 1)
117     order[d++] = bn;
118 
119   for (i = d - 1; i >= 0; i--)
120     {
121       bn = order[i];
122 
123       mpn_mul_1 (tp, rp, bn, k2);
124 
125       mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
126       mpn_mullo_n (rp, yp, tp2, bn);
127 
128       mpn_sub_n (tp2, tp, rp, bn);
129       mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
130     }
131 }
132