1 /* $NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $ */
2
3 /*
4 * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $");
30
31 #include <machine/ieee.h>
32
33 #include "fpu_emulate.h"
34
35 /*
36 * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
37 * The most significant byte of sp_m0 is EXP (signed byte) and the rest
38 * of sp_m0 is fp_mant[0].
39 */
40 struct sfpn {
41 uint32_t sp_m0;
42 uint32_t sp_m1;
43 uint32_t sp_m2;
44 };
45
46 #if defined(CORDIC_BOOTSTRAP)
47 /*
48 * This is a bootstrap code to generate a pre-calculated tables such as
49 * atan_table[]. However, it's just for reference.
50 * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
51 * and modify these files as a userland application.
52 */
53
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <float.h>
58
59 static void prepare_cordic_const(struct fpemu *);
60 static struct fpn *fpu_gain1_cordic(struct fpemu *);
61 static struct fpn *fpu_atan_taylor(struct fpemu *);
62 static void printf_fpn(const struct fpn *);
63 static void printf_sfpn(const struct sfpn *);
64 static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
65
66 static struct sfpn atan_table[EXT_FRACBITS];
67 static struct fpn inv_gain1;
68
69 int
main(int argc,char * argv[])70 main(int argc, char *argv[])
71 {
72 struct fpemu dummyfe;
73 int i;
74 struct fpn fp;
75
76 memset(&dummyfe, 0, sizeof(dummyfe));
77 prepare_cordic_const(&dummyfe);
78
79 /* output as source code */
80 printf("static const struct sfpn atan_table[] = {\n");
81 for (i = 0; i < EXT_FRACBITS; i++) {
82 printf("\t");
83 printf_sfpn(&atan_table[i]);
84 printf(",\n");
85 }
86 printf("};\n\n");
87
88 printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
89 printf_fpn(&inv_gain1);
90 printf(";\n\n");
91 }
92
93 /*
94 * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
95 * and fpu_atan_taylor() as bootstrap.
96 */
97 static void
prepare_cordic_const(struct fpemu * fe)98 prepare_cordic_const(struct fpemu *fe)
99 {
100 struct fpn t;
101 struct fpn x;
102 struct fpn *r;
103 int i;
104
105 /* atan_table */
106 fpu_const(&t, FPU_CONST_1);
107 for (i = 0; i < EXT_FRACBITS; i++) {
108 /* atan(t) */
109 CPYFPN(&fe->fe_f2, &t);
110 r = fpu_atan_taylor(fe);
111 fpn_to_sfpn(&atan_table[i], r);
112
113 /* t /= 2 */
114 t.fp_exp--;
115 }
116
117 /* inv_gain1 = 1 / gain1cordic() */
118 r = fpu_gain1_cordic(fe);
119 CPYFPN(&fe->fe_f2, r);
120 fpu_const(&fe->fe_f1, FPU_CONST_1);
121 r = fpu_div(fe);
122 CPYFPN(&inv_gain1, r);
123 }
124
125 static struct fpn *
fpu_gain1_cordic(struct fpemu * fe)126 fpu_gain1_cordic(struct fpemu *fe)
127 {
128 struct fpn x;
129 struct fpn y;
130 struct fpn z;
131 struct fpn v;
132
133 fpu_const(&x, FPU_CONST_1);
134 fpu_const(&y, FPU_CONST_0);
135 fpu_const(&z, FPU_CONST_0);
136 CPYFPN(&v, &x);
137 v.fp_sign = !v.fp_sign;
138
139 fpu_cordit1(fe, &x, &y, &z, &v);
140 CPYFPN(&fe->fe_f2, &x);
141 return &fe->fe_f2;
142 }
143
144 /*
145 * arctan(x) = pi/4 (for |x| = 1)
146 *
147 * x^3 x^5 x^7
148 * arctan(x) = x - --- + --- - --- + ... (for |x| < 1)
149 * 3 5 7
150 */
151 static struct fpn *
fpu_atan_taylor(struct fpemu * fe)152 fpu_atan_taylor(struct fpemu *fe)
153 {
154 struct fpn res;
155 struct fpn x2;
156 struct fpn s0;
157 struct fpn *s1;
158 struct fpn *r;
159 uint32_t k;
160
161 /* arctan(1) is pi/4 */
162 if (fe->fe_f2.fp_exp == 0) {
163 fpu_const(&fe->fe_f2, FPU_CONST_PI);
164 fe->fe_f2.fp_exp -= 2;
165 return &fe->fe_f2;
166 }
167
168 /* s0 := x */
169 CPYFPN(&s0, &fe->fe_f2);
170
171 /* res := x */
172 CPYFPN(&res, &fe->fe_f2);
173
174 /* x2 := x * x */
175 CPYFPN(&fe->fe_f1, &fe->fe_f2);
176 r = fpu_mul(fe);
177 CPYFPN(&x2, r);
178
179 k = 3;
180 for (;;) {
181 /* s1 := -s0 * x2 */
182 CPYFPN(&fe->fe_f1, &s0);
183 CPYFPN(&fe->fe_f2, &x2);
184 s1 = fpu_mul(fe);
185 s1->fp_sign ^= 1;
186 CPYFPN(&fe->fe_f1, s1);
187
188 /* s0 := s1 for next loop */
189 CPYFPN(&s0, s1);
190
191 /* s1 := s1 / k */
192 fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
193 s1 = fpu_div(fe);
194
195 /* break if s1 is enough small */
196 if (ISZERO(s1))
197 break;
198 if (res.fp_exp - s1->fp_exp >= FP_NMANT)
199 break;
200
201 /* res += s1 */
202 CPYFPN(&fe->fe_f2, s1);
203 CPYFPN(&fe->fe_f1, &res);
204 r = fpu_add(fe);
205 CPYFPN(&res, r);
206
207 k += 2;
208 }
209
210 CPYFPN(&fe->fe_f2, &res);
211 return &fe->fe_f2;
212 }
213
214 static void
printf_fpn(const struct fpn * fp)215 printf_fpn(const struct fpn *fp)
216 {
217 printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }",
218 fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0,
219 fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]);
220 }
221
222 static void
printf_sfpn(const struct sfpn * sp)223 printf_sfpn(const struct sfpn *sp)
224 {
225 printf("{ 0x%08x, 0x%08x, 0x%08x, }",
226 sp->sp_m0, sp->sp_m1, sp->sp_m2);
227 }
228
229 static void
fpn_to_sfpn(struct sfpn * sp,const struct fpn * fp)230 fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp)
231 {
232 sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0];
233 sp->sp_m1 = fp->fp_mant[1];
234 sp->sp_m2 = fp->fp_mant[2];
235 }
236
237 #else /* CORDIC_BOOTSTRAP */
238
239 static const struct sfpn atan_table[] = {
240 { 0xff06487e, 0xd5110b46, 0x11a80000, },
241 { 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, },
242 { 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, },
243 { 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, },
244 { 0xfb07fd56, 0xedcb3f7a, 0x71b65937, },
245 { 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, },
246 { 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, },
247 { 0xf807fff5, 0x556eeea5, 0xcb403117, },
248 { 0xf707fffd, 0x5556eeed, 0xca5d8956, },
249 { 0xf607ffff, 0x55556eee, 0xea5ca6ab, },
250 { 0xf507ffff, 0xd55556ee, 0xeedca5c8, },
251 { 0xf407ffff, 0xf555556e, 0xeeeea5c8, },
252 { 0xf307ffff, 0xfd555556, 0xeeeeedc8, },
253 { 0xf207ffff, 0xff555555, 0x6eeeeee8, },
254 { 0xf107ffff, 0xffd55555, 0x56eeeeed, },
255 { 0xf007ffff, 0xfff55555, 0x556eeeed, },
256 { 0xef07ffff, 0xfffd5555, 0x5556eeed, },
257 { 0xee07ffff, 0xffff5555, 0x55556eed, },
258 { 0xed07ffff, 0xffffd555, 0x555556ed, },
259 { 0xec07ffff, 0xfffff555, 0x5555556d, },
260 { 0xeb07ffff, 0xfffffd55, 0x55555555, },
261 { 0xea07ffff, 0xffffff55, 0x55555554, },
262 { 0xe907ffff, 0xffffffd5, 0x55555554, },
263 { 0xe807ffff, 0xfffffff5, 0x55555554, },
264 { 0xe707ffff, 0xfffffffd, 0x55555554, },
265 { 0xe607ffff, 0xffffffff, 0x55555554, },
266 { 0xe507ffff, 0xffffffff, 0xd5555554, },
267 { 0xe407ffff, 0xffffffff, 0xf5555554, },
268 { 0xe307ffff, 0xffffffff, 0xfd555554, },
269 { 0xe207ffff, 0xffffffff, 0xff555554, },
270 { 0xe107ffff, 0xffffffff, 0xffd55554, },
271 { 0xe007ffff, 0xffffffff, 0xfff55554, },
272 { 0xdf07ffff, 0xffffffff, 0xfffd5554, },
273 { 0xde07ffff, 0xffffffff, 0xffff5554, },
274 { 0xdd07ffff, 0xffffffff, 0xffffd554, },
275 { 0xdc07ffff, 0xffffffff, 0xfffff554, },
276 { 0xdb07ffff, 0xffffffff, 0xfffffd54, },
277 { 0xda07ffff, 0xffffffff, 0xffffff54, },
278 { 0xd907ffff, 0xffffffff, 0xffffffd4, },
279 { 0xd807ffff, 0xffffffff, 0xfffffff4, },
280 { 0xd707ffff, 0xffffffff, 0xfffffffc, },
281 { 0xd7040000, 0x00000000, 0x00000000, },
282 { 0xd6040000, 0x00000000, 0x00000000, },
283 { 0xd5040000, 0x00000000, 0x00000000, },
284 { 0xd4040000, 0x00000000, 0x00000000, },
285 { 0xd3040000, 0x00000000, 0x00000000, },
286 { 0xd2040000, 0x00000000, 0x00000000, },
287 { 0xd1040000, 0x00000000, 0x00000000, },
288 { 0xd0040000, 0x00000000, 0x00000000, },
289 { 0xcf040000, 0x00000000, 0x00000000, },
290 { 0xce040000, 0x00000000, 0x00000000, },
291 { 0xcd040000, 0x00000000, 0x00000000, },
292 { 0xcc040000, 0x00000000, 0x00000000, },
293 { 0xcb040000, 0x00000000, 0x00000000, },
294 { 0xca040000, 0x00000000, 0x00000000, },
295 { 0xc9040000, 0x00000000, 0x00000000, },
296 { 0xc8040000, 0x00000000, 0x00000000, },
297 { 0xc7040000, 0x00000000, 0x00000000, },
298 { 0xc6040000, 0x00000000, 0x00000000, },
299 { 0xc5040000, 0x00000000, 0x00000000, },
300 { 0xc4040000, 0x00000000, 0x00000000, },
301 { 0xc3040000, 0x00000000, 0x00000000, },
302 { 0xc2040000, 0x00000000, 0x00000000, },
303 { 0xc1040000, 0x00000000, 0x00000000, },
304 };
305
306 const struct fpn fpu_cordic_inv_gain1 =
307 { 1, 0, -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, };
308
309 #endif /* CORDIC_BOOTSTRAP */
310
311 static inline void
sfpn_to_fpn(struct fpn * fp,const struct sfpn * s)312 sfpn_to_fpn(struct fpn *fp, const struct sfpn *s)
313 {
314 fp->fp_class = FPC_NUM;
315 fp->fp_sign = 0;
316 fp->fp_sticky = 0;
317 fp->fp_exp = s->sp_m0 >> 24;
318 if (fp->fp_exp & 0x80) {
319 fp->fp_exp |= 0xffffff00;
320 }
321 fp->fp_mant[0] = s->sp_m0 & 0x000fffff;
322 fp->fp_mant[1] = s->sp_m1;
323 fp->fp_mant[2] = s->sp_m2;
324 }
325
326 void
fpu_cordit1(struct fpemu * fe,struct fpn * x0,struct fpn * y0,struct fpn * z0,const struct fpn * vecmode)327 fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
328 const struct fpn *vecmode)
329 {
330 struct fpn t;
331 struct fpn x;
332 struct fpn y;
333 struct fpn z;
334 struct fpn *r;
335 int i;
336 int sign;
337
338 fpu_const(&t, FPU_CONST_1);
339 CPYFPN(&x, x0);
340 CPYFPN(&y, y0);
341 CPYFPN(&z, z0);
342
343 for (i = 0; i < EXT_FRACBITS; i++) {
344 struct fpn x1;
345
346 /* y < vecmode */
347 CPYFPN(&fe->fe_f1, &y);
348 CPYFPN(&fe->fe_f2, vecmode);
349 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
350 r = fpu_add(fe);
351
352 if ((vecmode->fp_sign == 0 && r->fp_sign) ||
353 (vecmode->fp_sign && z.fp_sign == 0)) {
354 sign = 1;
355 } else {
356 sign = 0;
357 }
358
359 /* y * t */
360 CPYFPN(&fe->fe_f1, &y);
361 CPYFPN(&fe->fe_f2, &t);
362 r = fpu_mul(fe);
363
364 /*
365 * x1 = x - y*t (if sign)
366 * x1 = x + y*t
367 */
368 CPYFPN(&fe->fe_f2, r);
369 if (sign)
370 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
371 CPYFPN(&fe->fe_f1, &x);
372 r = fpu_add(fe);
373 CPYFPN(&x1, r);
374
375 /* x * t */
376 CPYFPN(&fe->fe_f1, &x);
377 CPYFPN(&fe->fe_f2, &t);
378 r = fpu_mul(fe);
379
380 /*
381 * y = y + x*t (if sign)
382 * y = y - x*t
383 */
384 CPYFPN(&fe->fe_f2, r);
385 if (!sign)
386 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
387 CPYFPN(&fe->fe_f1, &y);
388 r = fpu_add(fe);
389 CPYFPN(&y, r);
390
391 /*
392 * z = z - atan_table[i] (if sign)
393 * z = z + atan_table[i]
394 */
395 CPYFPN(&fe->fe_f1, &z);
396 sfpn_to_fpn(&fe->fe_f2, &atan_table[i]);
397 if (sign)
398 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
399 r = fpu_add(fe);
400 CPYFPN(&z, r);
401
402 /* x = x1 */
403 CPYFPN(&x, &x1);
404
405 /* t /= 2 */
406 t.fp_exp--;
407 }
408
409 CPYFPN(x0, &x);
410 CPYFPN(y0, &y);
411 CPYFPN(z0, &z);
412 }
413