xref: /netbsd-src/external/lgpl3/gmp/dist/tune/speed-ext.c (revision f14316bcbc544b96a93e884bc5c2b15fd60e22ae)
1 /* An example of extending the speed program to measure routines not in GMP.
2 
3 Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at your
10 option) any later version.
11 
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16 
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
19 
20 
21 /* The extension here is three versions of an mpn arithmetic mean.  These
22    aren't meant to be particularly useful, just examples.
23 
24    You can run something like the following to compare their speeds.
25 
26            ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
27 
28    On RISC chips, mean_open() might be fastest if the compiler is doing a
29    good job.  On the register starved x86s, mean_calls will be fastest.
30 
31 
32    Notes:
33 
34    SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
35    by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
36    files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
37 
38    The advantage of this #include "speed.c" scheme is that there's no
39    editing of a copy of that file, and new features in new versions of it
40    will be immediately available.
41 
42    In a real program the routines mean_calls() etc would probably be in
43    separate C or assembler source files, and just the measuring
44    speed_mean_calls() etc would be here.  Linking against other libraries
45    for things to measure is perfectly possible too.
46 
47    When attempting to compare two versions of the same named routine, say
48    like the generic and assembler versions of mpn_add_n(), creative use of
49    cc -D or #define is suggested, so one or both can be renamed and linked
50    into the same program.  It'll be much easier to compare them side by side
51    than with separate programs for each.
52 
53    common.c has notes on writing speed measuring routines.
54 
55    Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
56    not using libtool) to get common.o and other objects needed by speed.c.  */
57 
58 
59 #define SPEED_EXTRA_PROTOS                                              \
60   double speed_mean_calls (struct speed_params *s);			\
61   double speed_mean_open  (struct speed_params *s);			\
62   double speed_mean_open2 (struct speed_params *s);
63 
64 #define SPEED_EXTRA_ROUTINES            \
65   { "mean_calls",  speed_mean_calls  }, \
66   { "mean_open",   speed_mean_open   }, \
67   { "mean_open2",  speed_mean_open2  },
68 
69 #include "speed.c"
70 
71 
72 /* A straightforward implementation calling mpn subroutines.
73 
74    wp,size is set to (xp,size + yp,size) / 2.  The return value is the
75    remainder from the division.  The other versions are the same.  */
76 
77 mp_limb_t
78 mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
79 {
80   mp_limb_t  c, ret;
81 
82   ASSERT (size >= 1);
83 
84   c = mpn_add_n (wp, xp, yp, size);
85   ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
86   wp[size-1] |= (c << (GMP_LIMB_BITS-1));
87   return ret;
88 }
89 
90 
91 /* An open-coded version, making one pass over the data.  The right shift is
92    done as the added limbs are produced.  The addition code follows
93    mpn/generic/add_n.c. */
94 
95 mp_limb_t
96 mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
97 {
98   mp_limb_t  w, wprev, x, y, c, ret;
99   mp_size_t  i;
100 
101   ASSERT (size >= 1);
102 
103   x = xp[0];
104   y = yp[0];
105 
106   wprev = x + y;
107   c = (wprev < x);
108   ret = (wprev & 1);
109 
110 #define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
111 
112   for (i = 1; i < size; i++)
113     {
114       x = xp[i];
115       y = yp[i];
116 
117       w = x + c;
118       c = (w < x);
119       w += y;
120       c += (w < y);
121 
122       wp[i-1] = RSHIFT (w, wprev);
123       wprev = w;
124     }
125 
126   wp[i-1] = RSHIFT (c, wprev);
127 
128   return ret;
129 }
130 
131 
132 /* Another one-pass version, but right shifting the source limbs rather than
133    the result limbs.  There's not much chance of this being better than the
134    above, but it's an alternative at least. */
135 
136 mp_limb_t
137 mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
138 {
139   mp_limb_t  w, x, y, xnext, ynext, c, ret;
140   mp_size_t  i;
141 
142   ASSERT (size >= 1);
143 
144   x = xp[0];
145   y = yp[0];
146 
147   /* ret is the low bit of x+y, c is the carry out of that low bit add */
148   ret = (x ^ y) & 1;
149   c   = (x & y) & 1;
150 
151   for (i = 0; i < size-1; i++)
152     {
153       xnext = xp[i+1];
154       ynext = yp[i+1];
155       x = RSHIFT (xnext, x);
156       y = RSHIFT (ynext, y);
157 
158       w = x + c;
159       c = (w < x);
160       w += y;
161       c += (w < y);
162       wp[i] = w;
163 
164       x = xnext;
165       y = ynext;
166     }
167 
168   wp[i] = (x >> 1) + (y >> 1) + c;
169 
170   return ret;
171 }
172 
173 
174 /* The speed measuring routines are the same apart from which function they
175    run, so a macro is used.  Actually this macro is the same as
176    SPEED_ROUTINE_MPN_BINARY_N.  */
177 
178 #define SPEED_ROUTINE_MEAN(mean_fun)                    \
179   {                                                     \
180     unsigned  i;                                        \
181     mp_ptr    wp;                                       \
182     double    t;                                        \
183     TMP_DECL;                                  \
184                                                         \
185     SPEED_RESTRICT_COND (s->size >= 1);                 \
186                                                         \
187     TMP_MARK;                                  \
188     SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \
189                                                         \
190     speed_operand_src (s, s->xp, s->size);              \
191     speed_operand_src (s, s->yp, s->size);              \
192     speed_operand_dst (s, wp, s->size);                 \
193     speed_cache_fill (s);                               \
194                                                         \
195     speed_starttime ();                                 \
196     i = s->reps;                                        \
197     do                                                  \
198       mean_fun (wp, s->xp, s->yp, s->size);             \
199     while (--i != 0);                                   \
200     t = speed_endtime ();                               \
201                                                         \
202     TMP_FREE;                                  \
203     return t;                                           \
204   }
205 
206 double
207 speed_mean_calls (struct speed_params *s)
208 {
209   SPEED_ROUTINE_MEAN (mean_calls);
210 }
211 
212 double
213 speed_mean_open (struct speed_params *s)
214 {
215   SPEED_ROUTINE_MEAN (mean_open);
216 }
217 
218 double
219 speed_mean_open2 (struct speed_params *s)
220 {
221   SPEED_ROUTINE_MEAN (mean_open2);
222 }
223