xref: /netbsd-src/sys/arch/m68k/fpe/fpu_cordic.c (revision 67d39dde58d563ec5abbf213be925b711213ffd4)
1 /*	$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $	*/
2 
3 /*
4  * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
20  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
22  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $");
30 
31 #include <machine/ieee.h>
32 
33 #include "fpu_emulate.h"
34 
35 /*
36  * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
37  * The most significant byte of sp_m0 is EXP (signed byte) and the rest
38  * of sp_m0 is fp_mant[0].
39  */
40 struct sfpn {
41 	uint32_t sp_m0;
42 	uint32_t sp_m1;
43 	uint32_t sp_m2;
44 };
45 
46 #if defined(CORDIC_BOOTSTRAP)
47 /*
48  * This is a bootstrap code to generate a pre-calculated tables such as
49  * atan_table[].  However, it's just for reference.
50  * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
51  * and modify these files as a userland application.
52  */
53 
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <float.h>
58 
59 static void prepare_cordic_const(struct fpemu *);
60 static struct fpn *fpu_gain1_cordic(struct fpemu *);
61 static struct fpn *fpu_atan_taylor(struct fpemu *);
62 static void printf_fpn(const struct fpn *);
63 static void printf_sfpn(const struct sfpn *);
64 static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
65 
66 static struct sfpn atan_table[EXT_FRACBITS];
67 static struct fpn inv_gain1;
68 
69 int
main(int argc,char * argv[])70 main(int argc, char *argv[])
71 {
72 	struct fpemu dummyfe;
73 	int i;
74 	struct fpn fp;
75 
76 	memset(&dummyfe, 0, sizeof(dummyfe));
77 	prepare_cordic_const(&dummyfe);
78 
79 	/* output as source code */
80 	printf("static const struct sfpn atan_table[] = {\n");
81 	for (i = 0; i < EXT_FRACBITS; i++) {
82 		printf("\t");
83 		printf_sfpn(&atan_table[i]);
84 		printf(",\n");
85 	}
86 	printf("};\n\n");
87 
88 	printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
89 	printf_fpn(&inv_gain1);
90 	printf(";\n\n");
91 }
92 
93 /*
94  * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
95  * and fpu_atan_taylor() as bootstrap.
96  */
97 static void
prepare_cordic_const(struct fpemu * fe)98 prepare_cordic_const(struct fpemu *fe)
99 {
100 	struct fpn t;
101 	struct fpn x;
102 	struct fpn *r;
103 	int i;
104 
105 	/* atan_table */
106 	fpu_const(&t, FPU_CONST_1);
107 	for (i = 0; i < EXT_FRACBITS; i++) {
108 		/* atan(t) */
109 		CPYFPN(&fe->fe_f2, &t);
110 		r = fpu_atan_taylor(fe);
111 		fpn_to_sfpn(&atan_table[i], r);
112 
113 		/* t /= 2 */
114 		t.fp_exp--;
115 	}
116 
117 	/* inv_gain1 = 1 / gain1cordic() */
118 	r = fpu_gain1_cordic(fe);
119 	CPYFPN(&fe->fe_f2, r);
120 	fpu_const(&fe->fe_f1, FPU_CONST_1);
121 	r = fpu_div(fe);
122 	CPYFPN(&inv_gain1, r);
123 }
124 
125 static struct fpn *
fpu_gain1_cordic(struct fpemu * fe)126 fpu_gain1_cordic(struct fpemu *fe)
127 {
128 	struct fpn x;
129 	struct fpn y;
130 	struct fpn z;
131 	struct fpn v;
132 
133 	fpu_const(&x, FPU_CONST_1);
134 	fpu_const(&y, FPU_CONST_0);
135 	fpu_const(&z, FPU_CONST_0);
136 	CPYFPN(&v, &x);
137 	v.fp_sign = !v.fp_sign;
138 
139 	fpu_cordit1(fe, &x, &y, &z, &v);
140 	CPYFPN(&fe->fe_f2, &x);
141 	return &fe->fe_f2;
142 }
143 
144 /*
145  * arctan(x) = pi/4 (for |x| = 1)
146  *
147  *                 x^3   x^5   x^7
148  * arctan(x) = x - --- + --- - --- + ...   (for |x| < 1)
149  *                  3     5     7
150  */
151 static struct fpn *
fpu_atan_taylor(struct fpemu * fe)152 fpu_atan_taylor(struct fpemu *fe)
153 {
154 	struct fpn res;
155 	struct fpn x2;
156 	struct fpn s0;
157 	struct fpn *s1;
158 	struct fpn *r;
159 	uint32_t k;
160 
161 	/* arctan(1) is pi/4 */
162 	if (fe->fe_f2.fp_exp == 0) {
163 		fpu_const(&fe->fe_f2, FPU_CONST_PI);
164 		fe->fe_f2.fp_exp -= 2;
165 		return &fe->fe_f2;
166 	}
167 
168 	/* s0 := x */
169 	CPYFPN(&s0, &fe->fe_f2);
170 
171 	/* res := x */
172 	CPYFPN(&res, &fe->fe_f2);
173 
174 	/* x2 := x * x */
175 	CPYFPN(&fe->fe_f1, &fe->fe_f2);
176 	r = fpu_mul(fe);
177 	CPYFPN(&x2, r);
178 
179 	k = 3;
180 	for (;;) {
181 		/* s1 := -s0 * x2 */
182 		CPYFPN(&fe->fe_f1, &s0);
183 		CPYFPN(&fe->fe_f2, &x2);
184 		s1 = fpu_mul(fe);
185 		s1->fp_sign ^= 1;
186 		CPYFPN(&fe->fe_f1, s1);
187 
188 		/* s0 := s1 for next loop */
189 		CPYFPN(&s0, s1);
190 
191 		/* s1 := s1 / k */
192 		fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
193 		s1 = fpu_div(fe);
194 
195 		/* break if s1 is enough small */
196 		if (ISZERO(s1))
197 			break;
198 		if (res.fp_exp - s1->fp_exp >= FP_NMANT)
199 			break;
200 
201 		/* res += s1 */
202 		CPYFPN(&fe->fe_f2, s1);
203 		CPYFPN(&fe->fe_f1, &res);
204 		r = fpu_add(fe);
205 		CPYFPN(&res, r);
206 
207 		k += 2;
208 	}
209 
210 	CPYFPN(&fe->fe_f2, &res);
211 	return &fe->fe_f2;
212 }
213 
214 static void
printf_fpn(const struct fpn * fp)215 printf_fpn(const struct fpn *fp)
216 {
217 	printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }",
218 		fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0,
219 		fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]);
220 }
221 
222 static void
printf_sfpn(const struct sfpn * sp)223 printf_sfpn(const struct sfpn *sp)
224 {
225 	printf("{ 0x%08x, 0x%08x, 0x%08x, }",
226 		sp->sp_m0, sp->sp_m1, sp->sp_m2);
227 }
228 
229 static void
fpn_to_sfpn(struct sfpn * sp,const struct fpn * fp)230 fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp)
231 {
232 	sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0];
233 	sp->sp_m1 = fp->fp_mant[1];
234 	sp->sp_m2 = fp->fp_mant[2];
235 }
236 
237 #else /* CORDIC_BOOTSTRAP */
238 
239 static const struct sfpn atan_table[] = {
240 	{ 0xff06487e, 0xd5110b46, 0x11a80000, },
241 	{ 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, },
242 	{ 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, },
243 	{ 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, },
244 	{ 0xfb07fd56, 0xedcb3f7a, 0x71b65937, },
245 	{ 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, },
246 	{ 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, },
247 	{ 0xf807fff5, 0x556eeea5, 0xcb403117, },
248 	{ 0xf707fffd, 0x5556eeed, 0xca5d8956, },
249 	{ 0xf607ffff, 0x55556eee, 0xea5ca6ab, },
250 	{ 0xf507ffff, 0xd55556ee, 0xeedca5c8, },
251 	{ 0xf407ffff, 0xf555556e, 0xeeeea5c8, },
252 	{ 0xf307ffff, 0xfd555556, 0xeeeeedc8, },
253 	{ 0xf207ffff, 0xff555555, 0x6eeeeee8, },
254 	{ 0xf107ffff, 0xffd55555, 0x56eeeeed, },
255 	{ 0xf007ffff, 0xfff55555, 0x556eeeed, },
256 	{ 0xef07ffff, 0xfffd5555, 0x5556eeed, },
257 	{ 0xee07ffff, 0xffff5555, 0x55556eed, },
258 	{ 0xed07ffff, 0xffffd555, 0x555556ed, },
259 	{ 0xec07ffff, 0xfffff555, 0x5555556d, },
260 	{ 0xeb07ffff, 0xfffffd55, 0x55555555, },
261 	{ 0xea07ffff, 0xffffff55, 0x55555554, },
262 	{ 0xe907ffff, 0xffffffd5, 0x55555554, },
263 	{ 0xe807ffff, 0xfffffff5, 0x55555554, },
264 	{ 0xe707ffff, 0xfffffffd, 0x55555554, },
265 	{ 0xe607ffff, 0xffffffff, 0x55555554, },
266 	{ 0xe507ffff, 0xffffffff, 0xd5555554, },
267 	{ 0xe407ffff, 0xffffffff, 0xf5555554, },
268 	{ 0xe307ffff, 0xffffffff, 0xfd555554, },
269 	{ 0xe207ffff, 0xffffffff, 0xff555554, },
270 	{ 0xe107ffff, 0xffffffff, 0xffd55554, },
271 	{ 0xe007ffff, 0xffffffff, 0xfff55554, },
272 	{ 0xdf07ffff, 0xffffffff, 0xfffd5554, },
273 	{ 0xde07ffff, 0xffffffff, 0xffff5554, },
274 	{ 0xdd07ffff, 0xffffffff, 0xffffd554, },
275 	{ 0xdc07ffff, 0xffffffff, 0xfffff554, },
276 	{ 0xdb07ffff, 0xffffffff, 0xfffffd54, },
277 	{ 0xda07ffff, 0xffffffff, 0xffffff54, },
278 	{ 0xd907ffff, 0xffffffff, 0xffffffd4, },
279 	{ 0xd807ffff, 0xffffffff, 0xfffffff4, },
280 	{ 0xd707ffff, 0xffffffff, 0xfffffffc, },
281 	{ 0xd7040000, 0x00000000, 0x00000000, },
282 	{ 0xd6040000, 0x00000000, 0x00000000, },
283 	{ 0xd5040000, 0x00000000, 0x00000000, },
284 	{ 0xd4040000, 0x00000000, 0x00000000, },
285 	{ 0xd3040000, 0x00000000, 0x00000000, },
286 	{ 0xd2040000, 0x00000000, 0x00000000, },
287 	{ 0xd1040000, 0x00000000, 0x00000000, },
288 	{ 0xd0040000, 0x00000000, 0x00000000, },
289 	{ 0xcf040000, 0x00000000, 0x00000000, },
290 	{ 0xce040000, 0x00000000, 0x00000000, },
291 	{ 0xcd040000, 0x00000000, 0x00000000, },
292 	{ 0xcc040000, 0x00000000, 0x00000000, },
293 	{ 0xcb040000, 0x00000000, 0x00000000, },
294 	{ 0xca040000, 0x00000000, 0x00000000, },
295 	{ 0xc9040000, 0x00000000, 0x00000000, },
296 	{ 0xc8040000, 0x00000000, 0x00000000, },
297 	{ 0xc7040000, 0x00000000, 0x00000000, },
298 	{ 0xc6040000, 0x00000000, 0x00000000, },
299 	{ 0xc5040000, 0x00000000, 0x00000000, },
300 	{ 0xc4040000, 0x00000000, 0x00000000, },
301 	{ 0xc3040000, 0x00000000, 0x00000000, },
302 	{ 0xc2040000, 0x00000000, 0x00000000, },
303 	{ 0xc1040000, 0x00000000, 0x00000000, },
304 };
305 
306 const struct fpn fpu_cordic_inv_gain1 =
307 	{ 1, 0,  -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, };
308 
309 #endif /* CORDIC_BOOTSTRAP */
310 
311 static inline void
sfpn_to_fpn(struct fpn * fp,const struct sfpn * s)312 sfpn_to_fpn(struct fpn *fp, const struct sfpn *s)
313 {
314 	fp->fp_class = FPC_NUM;
315 	fp->fp_sign = 0;
316 	fp->fp_sticky = 0;
317 	fp->fp_exp = s->sp_m0 >> 24;
318 	if (fp->fp_exp & 0x80) {
319 		fp->fp_exp |= 0xffffff00;
320 	}
321 	fp->fp_mant[0] = s->sp_m0 & 0x000fffff;
322 	fp->fp_mant[1] = s->sp_m1;
323 	fp->fp_mant[2] = s->sp_m2;
324 }
325 
326 void
fpu_cordit1(struct fpemu * fe,struct fpn * x0,struct fpn * y0,struct fpn * z0,const struct fpn * vecmode)327 fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
328 	const struct fpn *vecmode)
329 {
330 	struct fpn t;
331 	struct fpn x;
332 	struct fpn y;
333 	struct fpn z;
334 	struct fpn *r;
335 	int i;
336 	int sign;
337 
338 	fpu_const(&t, FPU_CONST_1);
339 	CPYFPN(&x, x0);
340 	CPYFPN(&y, y0);
341 	CPYFPN(&z, z0);
342 
343 	for (i = 0; i < EXT_FRACBITS; i++) {
344 		struct fpn x1;
345 
346 		/* y < vecmode */
347 		CPYFPN(&fe->fe_f1, &y);
348 		CPYFPN(&fe->fe_f2, vecmode);
349 		fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
350 		r = fpu_add(fe);
351 
352 		if ((vecmode->fp_sign == 0 && r->fp_sign) ||
353 		    (vecmode->fp_sign && z.fp_sign == 0)) {
354 			sign = 1;
355 		} else {
356 			sign = 0;
357 		}
358 
359 		/* y * t */
360 		CPYFPN(&fe->fe_f1, &y);
361 		CPYFPN(&fe->fe_f2, &t);
362 		r = fpu_mul(fe);
363 
364 		/*
365 		 * x1 = x - y*t (if sign)
366 		 * x1 = x + y*t
367 		 */
368 		CPYFPN(&fe->fe_f2, r);
369 		if (sign)
370 			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
371 		CPYFPN(&fe->fe_f1, &x);
372 		r = fpu_add(fe);
373 		CPYFPN(&x1, r);
374 
375 		/* x * t */
376 		CPYFPN(&fe->fe_f1, &x);
377 		CPYFPN(&fe->fe_f2, &t);
378 		r = fpu_mul(fe);
379 
380 		/*
381 		 * y = y + x*t (if sign)
382 		 * y = y - x*t
383 		 */
384 		CPYFPN(&fe->fe_f2, r);
385 		if (!sign)
386 			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
387 		CPYFPN(&fe->fe_f1, &y);
388 		r = fpu_add(fe);
389 		CPYFPN(&y, r);
390 
391 		/*
392 		 * z = z - atan_table[i] (if sign)
393 		 * z = z + atan_table[i]
394 		 */
395 		CPYFPN(&fe->fe_f1, &z);
396 		sfpn_to_fpn(&fe->fe_f2, &atan_table[i]);
397 		if (sign)
398 			fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
399 		r = fpu_add(fe);
400 		CPYFPN(&z, r);
401 
402 		/* x = x1 */
403 		CPYFPN(&x, &x1);
404 
405 		/* t /= 2 */
406 		t.fp_exp--;
407 	}
408 
409 	CPYFPN(x0, &x);
410 	CPYFPN(y0, &y);
411 	CPYFPN(z0, &z);
412 }
413