xref: /netbsd-src/sys/arch/hppa/spmath/dfmpy.c (revision b1c86f5f087524e68db12794ee9c3e3da1ab17a0)
1 /*	$NetBSD: dfmpy.c,v 1.4 2007/02/22 05:46:29 thorpej Exp $	*/
2 
3 /*	$OpenBSD: dfmpy.c,v 1.4 2001/03/29 03:58:17 mickey Exp $	*/
4 
5 /*
6  * Copyright 1996 1995 by Open Software Foundation, Inc.
7  *              All Rights Reserved
8  *
9  * Permission to use, copy, modify, and distribute this software and
10  * its documentation for any purpose and without fee is hereby granted,
11  * provided that the above copyright notice appears in all copies and
12  * that both the copyright notice and this permission notice appear in
13  * supporting documentation.
14  *
15  * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
16  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
17  * FOR A PARTICULAR PURPOSE.
18  *
19  * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
20  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
21  * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
22  * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
23  * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24  *
25  */
26 /*
27  * pmk1.1
28  */
29 /*
30  * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
31  *
32  * To anyone who acknowledges that this file is provided "AS IS"
33  * without any express or implied warranty:
34  *     permission to use, copy, modify, and distribute this file
35  * for any purpose is hereby granted without fee, provided that
36  * the above copyright notice and this notice appears in all
37  * copies, and that the name of Hewlett-Packard Company not be
38  * used in advertising or publicity pertaining to distribution
39  * of the software without specific, written prior permission.
40  * Hewlett-Packard Company makes no representations about the
41  * suitability of this software for any purpose.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: dfmpy.c,v 1.4 2007/02/22 05:46:29 thorpej Exp $");
46 
47 #include "../spmath/float.h"
48 #include "../spmath/dbl_float.h"
49 
50 /*
51  *  Double Precision Floating-point Multiply
52  */
53 
54 int
55 dbl_fmpy(srcptr1,srcptr2,dstptr,status)
56 
57 dbl_floating_point *srcptr1, *srcptr2, *dstptr;
58 unsigned int *status;
59 {
60 	register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2;
61 	register unsigned int opnd3p1, opnd3p2, resultp1, resultp2;
62 	register int dest_exponent, count;
63 	register int inexact = false, guardbit = false, stickybit = false;
64 	int is_tiny;
65 
66 	Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2);
67 	Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2);
68 
69 	/*
70 	 * set sign bit of result
71 	 */
72 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
73 		Dbl_setnegativezerop1(resultp1);
74 	else Dbl_setzerop1(resultp1);
75 	/*
76 	 * check first operand for NaN's or infinity
77 	 */
78 	if (Dbl_isinfinity_exponent(opnd1p1)) {
79 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
80 			if (Dbl_isnotnan(opnd2p1,opnd2p2)) {
81 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
82 					/*
83 					 * invalid since operands are infinity
84 					 * and zero
85 					 */
86 					if (Is_invalidtrap_enabled())
87 						return(INVALIDEXCEPTION);
88 					Set_invalidflag();
89 					Dbl_makequietnan(resultp1,resultp2);
90 					Dbl_copytoptr(resultp1,resultp2,dstptr);
91 					return(NOEXCEPTION);
92 				}
93 				/*
94 				 * return infinity
95 				 */
96 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
97 				Dbl_copytoptr(resultp1,resultp2,dstptr);
98 				return(NOEXCEPTION);
99 			}
100 		}
101 		else {
102 			/*
103 			 * is NaN; signaling or quiet?
104 			 */
105 			if (Dbl_isone_signaling(opnd1p1)) {
106 				/* trap if INVALIDTRAP enabled */
107 				if (Is_invalidtrap_enabled())
108 					return(INVALIDEXCEPTION);
109 				/* make NaN quiet */
110 				Set_invalidflag();
111 				Dbl_set_quiet(opnd1p1);
112 			}
113 			/*
114 			 * is second operand a signaling NaN?
115 			 */
116 			else if (Dbl_is_signalingnan(opnd2p1)) {
117 				/* trap if INVALIDTRAP enabled */
118 				if (Is_invalidtrap_enabled())
119 					return(INVALIDEXCEPTION);
120 				/* make NaN quiet */
121 				Set_invalidflag();
122 				Dbl_set_quiet(opnd2p1);
123 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
124 				return(NOEXCEPTION);
125 			}
126 			/*
127 			 * return quiet NaN
128 			 */
129 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
130 			return(NOEXCEPTION);
131 		}
132 	}
133 	/*
134 	 * check second operand for NaN's or infinity
135 	 */
136 	if (Dbl_isinfinity_exponent(opnd2p1)) {
137 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
138 			if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
139 				/* invalid since operands are zero & infinity */
140 				if (Is_invalidtrap_enabled())
141 					return(INVALIDEXCEPTION);
142 				Set_invalidflag();
143 				Dbl_makequietnan(opnd2p1,opnd2p2);
144 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
145 				return(NOEXCEPTION);
146 			}
147 			/*
148 			 * return infinity
149 			 */
150 			Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
151 			Dbl_copytoptr(resultp1,resultp2,dstptr);
152 			return(NOEXCEPTION);
153 		}
154 		/*
155 		 * is NaN; signaling or quiet?
156 		 */
157 		if (Dbl_isone_signaling(opnd2p1)) {
158 			/* trap if INVALIDTRAP enabled */
159 			if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
160 			/* make NaN quiet */
161 			Set_invalidflag();
162 			Dbl_set_quiet(opnd2p1);
163 		}
164 		/*
165 		 * return quiet NaN
166 		 */
167 		Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
168 		return(NOEXCEPTION);
169 	}
170 	/*
171 	 * Generate exponent
172 	 */
173 	dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS;
174 
175 	/*
176 	 * Generate mantissa
177 	 */
178 	if (Dbl_isnotzero_exponent(opnd1p1)) {
179 		/* set hidden bit */
180 		Dbl_clear_signexponent_set_hidden(opnd1p1);
181 	}
182 	else {
183 		/* check for zero */
184 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
185 			Dbl_setzero_exponentmantissa(resultp1,resultp2);
186 			Dbl_copytoptr(resultp1,resultp2,dstptr);
187 			return(NOEXCEPTION);
188 		}
189 		/* is denormalized, adjust exponent */
190 		Dbl_clear_signexponent(opnd1p1);
191 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
192 		Dbl_normalize(opnd1p1,opnd1p2,dest_exponent);
193 	}
194 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
195 	if (Dbl_isnotzero_exponent(opnd2p1)) {
196 		Dbl_clear_signexponent_set_hidden(opnd2p1);
197 	}
198 	else {
199 		/* check for zero */
200 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
201 			Dbl_setzero_exponentmantissa(resultp1,resultp2);
202 			Dbl_copytoptr(resultp1,resultp2,dstptr);
203 			return(NOEXCEPTION);
204 		}
205 		/* is denormalized; want to normalize */
206 		Dbl_clear_signexponent(opnd2p1);
207 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
208 		Dbl_normalize(opnd2p1,opnd2p2,dest_exponent);
209 	}
210 
211 	/* Multiply two source mantissas together */
212 
213 	/* make room for guard bits */
214 	Dbl_leftshiftby7(opnd2p1,opnd2p2);
215 	Dbl_setzero(opnd3p1,opnd3p2);
216 	/*
217 	 * Four bits at a time are inspected in each loop, and a
218 	 * simple shift and add multiply algorithm is used.
219 	 */
220 	for (count=1;count<=DBL_P;count+=4) {
221 		stickybit |= Dlow4p2(opnd3p2);
222 		Dbl_rightshiftby4(opnd3p1,opnd3p2);
223 		if (Dbit28p2(opnd1p2)) {
224 			/* Twoword_add should be an ADDC followed by an ADD. */
225 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29,
226 				    opnd2p2<<3);
227 		}
228 		if (Dbit29p2(opnd1p2)) {
229 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30,
230 				    opnd2p2<<2);
231 		}
232 		if (Dbit30p2(opnd1p2)) {
233 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31,
234 				    opnd2p2<<1);
235 		}
236 		if (Dbit31p2(opnd1p2)) {
237 			Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2);
238 		}
239 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
240 	}
241 	if (Dbit3p1(opnd3p1)==0) {
242 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
243 	}
244 	else {
245 		/* result mantissa >= 2. */
246 		dest_exponent++;
247 	}
248 	/* check for denormalized result */
249 	while (Dbit3p1(opnd3p1)==0) {
250 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
251 		dest_exponent--;
252 	}
253 	/*
254 	 * check for guard, sticky and inexact bits
255 	 */
256 	stickybit |= Dallp2(opnd3p2) << 25;
257 	guardbit = (Dallp2(opnd3p2) << 24) >> 31;
258 	inexact = guardbit | stickybit;
259 
260 	/* align result mantissa */
261 	Dbl_rightshiftby8(opnd3p1,opnd3p2);
262 
263 	/*
264 	 * round result
265 	 */
266 	if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) {
267 		Dbl_clear_signexponent(opnd3p1);
268 		switch (Rounding_mode()) {
269 			case ROUNDPLUS:
270 				if (Dbl_iszero_sign(resultp1))
271 					Dbl_increment(opnd3p1,opnd3p2);
272 				break;
273 			case ROUNDMINUS:
274 				if (Dbl_isone_sign(resultp1))
275 					Dbl_increment(opnd3p1,opnd3p2);
276 				break;
277 			case ROUNDNEAREST:
278 				if (guardbit &&
279 				    (stickybit || Dbl_isone_lowmantissap2(opnd3p2)))
280 					Dbl_increment(opnd3p1,opnd3p2);
281 				break;
282 		}
283 		if (Dbl_isone_hidden(opnd3p1)) dest_exponent++;
284 	}
285 	Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2);
286 
287 	/*
288 	 * Test for overflow
289 	 */
290 	if (dest_exponent >= DBL_INFINITY_EXPONENT) {
291 		/* trap if OVERFLOWTRAP enabled */
292 		if (Is_overflowtrap_enabled()) {
293 			/*
294 			 * Adjust bias of result
295 			 */
296 			Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl);
297 			Dbl_copytoptr(resultp1,resultp2,dstptr);
298 			if (inexact) {
299 			    if (Is_inexacttrap_enabled())
300 				return (OVERFLOWEXCEPTION | INEXACTEXCEPTION);
301 			    else
302 				Set_inexactflag();
303 			}
304 			return (OVERFLOWEXCEPTION);
305 		}
306 		inexact = true;
307 		Set_overflowflag();
308 		/* set result to infinity or largest number */
309 		Dbl_setoverflow(resultp1,resultp2);
310 	}
311 	/*
312 	 * Test for underflow
313 	 */
314 	else if (dest_exponent <= 0) {
315 		/* trap if UNDERFLOWTRAP enabled */
316 		if (Is_underflowtrap_enabled()) {
317 			/*
318 			 * Adjust bias of result
319 			 */
320 			Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl);
321 			Dbl_copytoptr(resultp1,resultp2,dstptr);
322 			if (inexact) {
323 			    if (Is_inexacttrap_enabled())
324 				return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION);
325 			    else
326 				Set_inexactflag();
327 			}
328 			return (UNDERFLOWEXCEPTION);
329 		}
330 
331 		/* Determine if should set underflow flag */
332 		is_tiny = true;
333 		if (dest_exponent == 0 && inexact) {
334 			switch (Rounding_mode()) {
335 			case ROUNDPLUS:
336 				if (Dbl_iszero_sign(resultp1)) {
337 					Dbl_increment(opnd3p1,opnd3p2);
338 					if (Dbl_isone_hiddenoverflow(opnd3p1))
339 						is_tiny = false;
340 					Dbl_decrement(opnd3p1,opnd3p2);
341 				}
342 				break;
343 			case ROUNDMINUS:
344 				if (Dbl_isone_sign(resultp1)) {
345 					Dbl_increment(opnd3p1,opnd3p2);
346 					if (Dbl_isone_hiddenoverflow(opnd3p1))
347 						is_tiny = false;
348 					Dbl_decrement(opnd3p1,opnd3p2);
349 				}
350 				break;
351 			case ROUNDNEAREST:
352 				if (guardbit && (stickybit ||
353 				    Dbl_isone_lowmantissap2(opnd3p2))) {
354 					Dbl_increment(opnd3p1,opnd3p2);
355 					if (Dbl_isone_hiddenoverflow(opnd3p1))
356 					is_tiny = false;
357 					Dbl_decrement(opnd3p1,opnd3p2);
358 				}
359 				break;
360 			}
361 		}
362 
363 		/*
364 		 * denormalize result or set to signed zero
365 		 */
366 		stickybit = inexact;
367 		Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit,
368 		 stickybit,inexact);
369 
370 		/* return zero or smallest number */
371 		if (inexact) {
372 			switch (Rounding_mode()) {
373 			case ROUNDPLUS:
374 				if (Dbl_iszero_sign(resultp1)) {
375 					Dbl_increment(opnd3p1,opnd3p2);
376 				}
377 				break;
378 			case ROUNDMINUS:
379 				if (Dbl_isone_sign(resultp1)) {
380 					Dbl_increment(opnd3p1,opnd3p2);
381 				}
382 				break;
383 			case ROUNDNEAREST:
384 				if (guardbit && (stickybit ||
385 				    Dbl_isone_lowmantissap2(opnd3p2))) {
386 					Dbl_increment(opnd3p1,opnd3p2);
387 				}
388 				break;
389 			}
390 			if (is_tiny) Set_underflowflag();
391 		}
392 		Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2);
393 	}
394 	else Dbl_set_exponent(resultp1,dest_exponent);
395 	/* check for inexact */
396 	Dbl_copytoptr(resultp1,resultp2,dstptr);
397 	if (inexact) {
398 		if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION);
399 		else Set_inexactflag();
400 	}
401 	return(NOEXCEPTION);
402 }
403