xref: /netbsd-src/external/lgpl3/gmp/dist/tune/modlinv.c (revision 16dce51364ebe8aeafbae46bc5aa167b8115bc45)
1 /* Alternate implementations of binvert_limb to compare speeds. */
2 
3 /*
4 Copyright 2000, 2002 Free Software Foundation, Inc.
5 
6 This file is part of the GNU MP Library.
7 
8 The GNU MP Library is free software; you can redistribute it and/or modify
9 it under the terms of either:
10 
11   * the GNU Lesser General Public License as published by the Free
12     Software Foundation; either version 3 of the License, or (at your
13     option) any later version.
14 
15 or
16 
17   * the GNU General Public License as published by the Free Software
18     Foundation; either version 2 of the License, or (at your option) any
19     later version.
20 
21 or both in parallel, as here.
22 
23 The GNU MP Library is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26 for more details.
27 
28 You should have received copies of the GNU General Public License and the
29 GNU Lesser General Public License along with the GNU MP Library.  If not,
30 see https://www.gnu.org/licenses/.  */
31 
32 #include <stdio.h>
33 #include "gmp.h"
34 #include "gmp-impl.h"
35 #include "longlong.h"
36 #include "speed.h"
37 
38 
39 /* Like the standard version in gmp-impl.h, but with the expressions using a
40    "1-" form.  This has the same number of steps, but "1-" is on the
41    dependent chain, whereas the "2*" in the standard version isn't.
42    Depending on the CPU this should be the same or a touch slower.  */
43 
44 #if GMP_LIMB_BITS <= 32
45 #define binvert_limb_mul1(inv,n)                                \
46   do {                                                          \
47     mp_limb_t  __n = (n);                                       \
48     mp_limb_t  __inv;                                           \
49     ASSERT ((__n & 1) == 1);                                    \
50     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
51     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
52     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
53     ASSERT (__inv * __n == 1);                                  \
54     (inv) = __inv;                                              \
55   } while (0)
56 #endif
57 
58 #if GMP_LIMB_BITS > 32 && GMP_LIMB_BITS <= 64
59 #define binvert_limb_mul1(inv,n)                                \
60   do {                                                          \
61     mp_limb_t  __n = (n);                                       \
62     mp_limb_t  __inv;                                           \
63     ASSERT ((__n & 1) == 1);                                    \
64     __inv = binvert_limb_table[(__n&0xFF)/2]; /*  8 */          \
65     __inv = (1 - __n * __inv) * __inv + __inv;  /* 16 */        \
66     __inv = (1 - __n * __inv) * __inv + __inv;  /* 32 */        \
67     __inv = (1 - __n * __inv) * __inv + __inv;  /* 64 */        \
68     ASSERT (__inv * __n == 1);                                  \
69     (inv) = __inv;                                              \
70   } while (0)
71 #endif
72 
73 
74 /* The loop based version used in GMP 3.0 and earlier.  Usually slower than
75    multiplying, due to the number of steps that must be performed.  Much
76    slower when the processor has a good multiply.  */
77 
78 #define binvert_limb_loop(inv,n)                \
79   do {                                          \
80     mp_limb_t  __v = (n);                       \
81     mp_limb_t  __v_orig = __v;                  \
82     mp_limb_t  __make_zero = 1;                 \
83     mp_limb_t  __two_i = 1;                     \
84     mp_limb_t  __v_inv = 0;                     \
85                                                 \
86     ASSERT ((__v & 1) == 1);                    \
87                                                 \
88     do                                          \
89       {                                         \
90         while ((__two_i & __make_zero) == 0)    \
91           __two_i <<= 1, __v <<= 1;             \
92         __v_inv += __two_i;                     \
93         __make_zero -= __v;                     \
94       }                                         \
95     while (__make_zero);                        \
96                                                 \
97     ASSERT (__v_orig * __v_inv == 1);           \
98     (inv) = __v_inv;                            \
99   } while (0)
100 
101 
102 /* Another loop based version with conditionals, but doing a fixed number of
103    steps. */
104 
105 #define binvert_limb_cond(inv,n)                \
106   do {                                          \
107     mp_limb_t  __n = (n);                       \
108     mp_limb_t  __rem = (1 - __n) >> 1;          \
109     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;        \
110     int        __count;                         \
111                                                 \
112     ASSERT ((__n & 1) == 1);                    \
113                                                 \
114     __count = GMP_LIMB_BITS-1;               \
115     do                                          \
116       {                                         \
117         __inv >>= 1;                            \
118         if (__rem & 1)                          \
119           {                                     \
120             __inv |= GMP_LIMB_HIGHBIT;          \
121             __rem -= __n;                       \
122           }                                     \
123         __rem >>= 1;                            \
124       }                                         \
125     while (-- __count);                         \
126                                                 \
127     ASSERT (__inv * __n == 1);                  \
128     (inv) = __inv;                              \
129   } while (0)
130 
131 
132 /* Another loop based bitwise version, but purely arithmetic, no
133    conditionals. */
134 
135 #define binvert_limb_arith(inv,n)                                       \
136   do {                                                                  \
137     mp_limb_t  __n = (n);                                               \
138     mp_limb_t  __rem = (1 - __n) >> 1;                                  \
139     mp_limb_t  __inv = GMP_LIMB_HIGHBIT;                                \
140     mp_limb_t  __lowbit;                                                \
141     int        __count;                                                 \
142                                                                         \
143     ASSERT ((__n & 1) == 1);                                            \
144                                                                         \
145     __count = GMP_LIMB_BITS-1;                                       \
146     do                                                                  \
147       {                                                                 \
148         __lowbit = __rem & 1;                                           \
149         __inv = (__inv >> 1) | (__lowbit << (GMP_LIMB_BITS-1));      \
150         __rem = (__rem - (__n & -__lowbit)) >> 1;                       \
151       }                                                                 \
152     while (-- __count);                                                 \
153                                                                         \
154     ASSERT (__inv * __n == 1);                                          \
155     (inv) = __inv;                                                      \
156   } while (0)
157 
158 
159 double
160 speed_binvert_limb_mul1 (struct speed_params *s)
161 {
162   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_mul1);
163 }
164 double
165 speed_binvert_limb_loop (struct speed_params *s)
166 {
167   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_loop);
168 }
169 double
170 speed_binvert_limb_cond (struct speed_params *s)
171 {
172   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_cond);
173 }
174 double
175 speed_binvert_limb_arith (struct speed_params *s)
176 {
177   SPEED_ROUTINE_MODLIMB_INVERT (binvert_limb_arith);
178 }
179