xref: /openbsd-src/sys/arch/hppa/spmath/dfmpy.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: dfmpy.c,v 1.4 2001/03/29 03:58:17 mickey Exp $	*/
2 
3 /*
4  * Copyright 1996 1995 by Open Software Foundation, Inc.
5  *              All Rights Reserved
6  *
7  * Permission to use, copy, modify, and distribute this software and
8  * its documentation for any purpose and without fee is hereby granted,
9  * provided that the above copyright notice appears in all copies and
10  * that both the copyright notice and this permission notice appear in
11  * supporting documentation.
12  *
13  * OSF DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
14  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
15  * FOR A PARTICULAR PURPOSE.
16  *
17  * IN NO EVENT SHALL OSF BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
18  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
19  * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
20  * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
21  * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  */
24 /*
25  * pmk1.1
26  */
27 /*
28  * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
29  *
30  * To anyone who acknowledges that this file is provided "AS IS"
31  * without any express or implied warranty:
32  *     permission to use, copy, modify, and distribute this file
33  * for any purpose is hereby granted without fee, provided that
34  * the above copyright notice and this notice appears in all
35  * copies, and that the name of Hewlett-Packard Company not be
36  * used in advertising or publicity pertaining to distribution
37  * of the software without specific, written prior permission.
38  * Hewlett-Packard Company makes no representations about the
39  * suitability of this software for any purpose.
40  */
41 
42 #include "../spmath/float.h"
43 #include "../spmath/dbl_float.h"
44 
45 /*
46  *  Double Precision Floating-point Multiply
47  */
48 
49 int
50 dbl_fmpy(srcptr1,srcptr2,dstptr,status)
51 
52 dbl_floating_point *srcptr1, *srcptr2, *dstptr;
53 unsigned int *status;
54 {
55 	register unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2;
56 	register unsigned int opnd3p1, opnd3p2, resultp1, resultp2;
57 	register int dest_exponent, count;
58 	register int inexact = FALSE, guardbit = FALSE, stickybit = FALSE;
59 	int is_tiny;
60 
61 	Dbl_copyfromptr(srcptr1,opnd1p1,opnd1p2);
62 	Dbl_copyfromptr(srcptr2,opnd2p1,opnd2p2);
63 
64 	/*
65 	 * set sign bit of result
66 	 */
67 	if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1))
68 		Dbl_setnegativezerop1(resultp1);
69 	else Dbl_setzerop1(resultp1);
70 	/*
71 	 * check first operand for NaN's or infinity
72 	 */
73 	if (Dbl_isinfinity_exponent(opnd1p1)) {
74 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
75 			if (Dbl_isnotnan(opnd2p1,opnd2p2)) {
76 				if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
77 					/*
78 					 * invalid since operands are infinity
79 					 * and zero
80 					 */
81 					if (Is_invalidtrap_enabled())
82 						return(INVALIDEXCEPTION);
83 					Set_invalidflag();
84 					Dbl_makequietnan(resultp1,resultp2);
85 					Dbl_copytoptr(resultp1,resultp2,dstptr);
86 					return(NOEXCEPTION);
87 				}
88 				/*
89 				 * return infinity
90 				 */
91 				Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
92 				Dbl_copytoptr(resultp1,resultp2,dstptr);
93 				return(NOEXCEPTION);
94 			}
95 		}
96 		else {
97 			/*
98 			 * is NaN; signaling or quiet?
99 			 */
100 			if (Dbl_isone_signaling(opnd1p1)) {
101 				/* trap if INVALIDTRAP enabled */
102 				if (Is_invalidtrap_enabled())
103 					return(INVALIDEXCEPTION);
104 				/* make NaN quiet */
105 				Set_invalidflag();
106 				Dbl_set_quiet(opnd1p1);
107 			}
108 			/*
109 			 * is second operand a signaling NaN?
110 			 */
111 			else if (Dbl_is_signalingnan(opnd2p1)) {
112 				/* trap if INVALIDTRAP enabled */
113 				if (Is_invalidtrap_enabled())
114 					return(INVALIDEXCEPTION);
115 				/* make NaN quiet */
116 				Set_invalidflag();
117 				Dbl_set_quiet(opnd2p1);
118 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
119 				return(NOEXCEPTION);
120 			}
121 			/*
122 			 * return quiet NaN
123 			 */
124 			Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
125 			return(NOEXCEPTION);
126 		}
127 	}
128 	/*
129 	 * check second operand for NaN's or infinity
130 	 */
131 	if (Dbl_isinfinity_exponent(opnd2p1)) {
132 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
133 			if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
134 				/* invalid since operands are zero & infinity */
135 				if (Is_invalidtrap_enabled())
136 					return(INVALIDEXCEPTION);
137 				Set_invalidflag();
138 				Dbl_makequietnan(opnd2p1,opnd2p2);
139 				Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
140 				return(NOEXCEPTION);
141 			}
142 			/*
143 			 * return infinity
144 			 */
145 			Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
146 			Dbl_copytoptr(resultp1,resultp2,dstptr);
147 			return(NOEXCEPTION);
148 		}
149 		/*
150 		 * is NaN; signaling or quiet?
151 		 */
152 		if (Dbl_isone_signaling(opnd2p1)) {
153 			/* trap if INVALIDTRAP enabled */
154 			if (Is_invalidtrap_enabled()) return(INVALIDEXCEPTION);
155 			/* make NaN quiet */
156 			Set_invalidflag();
157 			Dbl_set_quiet(opnd2p1);
158 		}
159 		/*
160 		 * return quiet NaN
161 		 */
162 		Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
163 		return(NOEXCEPTION);
164 	}
165 	/*
166 	 * Generate exponent
167 	 */
168 	dest_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) -DBL_BIAS;
169 
170 	/*
171 	 * Generate mantissa
172 	 */
173 	if (Dbl_isnotzero_exponent(opnd1p1)) {
174 		/* set hidden bit */
175 		Dbl_clear_signexponent_set_hidden(opnd1p1);
176 	}
177 	else {
178 		/* check for zero */
179 		if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
180 			Dbl_setzero_exponentmantissa(resultp1,resultp2);
181 			Dbl_copytoptr(resultp1,resultp2,dstptr);
182 			return(NOEXCEPTION);
183 		}
184 		/* is denormalized, adjust exponent */
185 		Dbl_clear_signexponent(opnd1p1);
186 		Dbl_leftshiftby1(opnd1p1,opnd1p2);
187 		Dbl_normalize(opnd1p1,opnd1p2,dest_exponent);
188 	}
189 	/* opnd2 needs to have hidden bit set with msb in hidden bit */
190 	if (Dbl_isnotzero_exponent(opnd2p1)) {
191 		Dbl_clear_signexponent_set_hidden(opnd2p1);
192 	}
193 	else {
194 		/* check for zero */
195 		if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
196 			Dbl_setzero_exponentmantissa(resultp1,resultp2);
197 			Dbl_copytoptr(resultp1,resultp2,dstptr);
198 			return(NOEXCEPTION);
199 		}
200 		/* is denormalized; want to normalize */
201 		Dbl_clear_signexponent(opnd2p1);
202 		Dbl_leftshiftby1(opnd2p1,opnd2p2);
203 		Dbl_normalize(opnd2p1,opnd2p2,dest_exponent);
204 	}
205 
206 	/* Multiply two source mantissas together */
207 
208 	/* make room for guard bits */
209 	Dbl_leftshiftby7(opnd2p1,opnd2p2);
210 	Dbl_setzero(opnd3p1,opnd3p2);
211 	/*
212 	 * Four bits at a time are inspected in each loop, and a
213 	 * simple shift and add multiply algorithm is used.
214 	 */
215 	for (count=1;count<=DBL_P;count+=4) {
216 		stickybit |= Dlow4p2(opnd3p2);
217 		Dbl_rightshiftby4(opnd3p1,opnd3p2);
218 		if (Dbit28p2(opnd1p2)) {
219 			/* Twoword_add should be an ADDC followed by an ADD. */
220 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<3 | opnd2p2>>29,
221 				    opnd2p2<<3);
222 		}
223 		if (Dbit29p2(opnd1p2)) {
224 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<2 | opnd2p2>>30,
225 				    opnd2p2<<2);
226 		}
227 		if (Dbit30p2(opnd1p2)) {
228 			Twoword_add(opnd3p1, opnd3p2, opnd2p1<<1 | opnd2p2>>31,
229 				    opnd2p2<<1);
230 		}
231 		if (Dbit31p2(opnd1p2)) {
232 			Twoword_add(opnd3p1, opnd3p2, opnd2p1, opnd2p2);
233 		}
234 		Dbl_rightshiftby4(opnd1p1,opnd1p2);
235 	}
236 	if (Dbit3p1(opnd3p1)==0) {
237 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
238 	}
239 	else {
240 		/* result mantissa >= 2. */
241 		dest_exponent++;
242 	}
243 	/* check for denormalized result */
244 	while (Dbit3p1(opnd3p1)==0) {
245 		Dbl_leftshiftby1(opnd3p1,opnd3p2);
246 		dest_exponent--;
247 	}
248 	/*
249 	 * check for guard, sticky and inexact bits
250 	 */
251 	stickybit |= Dallp2(opnd3p2) << 25;
252 	guardbit = (Dallp2(opnd3p2) << 24) >> 31;
253 	inexact = guardbit | stickybit;
254 
255 	/* align result mantissa */
256 	Dbl_rightshiftby8(opnd3p1,opnd3p2);
257 
258 	/*
259 	 * round result
260 	 */
261 	if (inexact && (dest_exponent>0 || Is_underflowtrap_enabled())) {
262 		Dbl_clear_signexponent(opnd3p1);
263 		switch (Rounding_mode()) {
264 			case ROUNDPLUS:
265 				if (Dbl_iszero_sign(resultp1))
266 					Dbl_increment(opnd3p1,opnd3p2);
267 				break;
268 			case ROUNDMINUS:
269 				if (Dbl_isone_sign(resultp1))
270 					Dbl_increment(opnd3p1,opnd3p2);
271 				break;
272 			case ROUNDNEAREST:
273 				if (guardbit &&
274 				    (stickybit || Dbl_isone_lowmantissap2(opnd3p2)))
275 					Dbl_increment(opnd3p1,opnd3p2);
276 				break;
277 		}
278 		if (Dbl_isone_hidden(opnd3p1)) dest_exponent++;
279 	}
280 	Dbl_set_mantissa(resultp1,resultp2,opnd3p1,opnd3p2);
281 
282 	/*
283 	 * Test for overflow
284 	 */
285 	if (dest_exponent >= DBL_INFINITY_EXPONENT) {
286 		/* trap if OVERFLOWTRAP enabled */
287 		if (Is_overflowtrap_enabled()) {
288 			/*
289 			 * Adjust bias of result
290 			 */
291 			Dbl_setwrapped_exponent(resultp1,dest_exponent,ovfl);
292 			Dbl_copytoptr(resultp1,resultp2,dstptr);
293 			if (inexact) {
294 			    if (Is_inexacttrap_enabled())
295 				return (OVERFLOWEXCEPTION | INEXACTEXCEPTION);
296 			    else
297 				Set_inexactflag();
298 			}
299 			return (OVERFLOWEXCEPTION);
300 		}
301 		inexact = TRUE;
302 		Set_overflowflag();
303 		/* set result to infinity or largest number */
304 		Dbl_setoverflow(resultp1,resultp2);
305 	}
306 	/*
307 	 * Test for underflow
308 	 */
309 	else if (dest_exponent <= 0) {
310 		/* trap if UNDERFLOWTRAP enabled */
311 		if (Is_underflowtrap_enabled()) {
312 			/*
313 			 * Adjust bias of result
314 			 */
315 			Dbl_setwrapped_exponent(resultp1,dest_exponent,unfl);
316 			Dbl_copytoptr(resultp1,resultp2,dstptr);
317 			if (inexact) {
318 			    if (Is_inexacttrap_enabled())
319 				return (UNDERFLOWEXCEPTION | INEXACTEXCEPTION);
320 			    else
321 				Set_inexactflag();
322 			}
323 			return (UNDERFLOWEXCEPTION);
324 		}
325 
326 		/* Determine if should set underflow flag */
327 		is_tiny = TRUE;
328 		if (dest_exponent == 0 && inexact) {
329 			switch (Rounding_mode()) {
330 			case ROUNDPLUS:
331 				if (Dbl_iszero_sign(resultp1)) {
332 					Dbl_increment(opnd3p1,opnd3p2);
333 					if (Dbl_isone_hiddenoverflow(opnd3p1))
334 						is_tiny = FALSE;
335 					Dbl_decrement(opnd3p1,opnd3p2);
336 				}
337 				break;
338 			case ROUNDMINUS:
339 				if (Dbl_isone_sign(resultp1)) {
340 					Dbl_increment(opnd3p1,opnd3p2);
341 					if (Dbl_isone_hiddenoverflow(opnd3p1))
342 						is_tiny = FALSE;
343 					Dbl_decrement(opnd3p1,opnd3p2);
344 				}
345 				break;
346 			case ROUNDNEAREST:
347 				if (guardbit && (stickybit ||
348 				    Dbl_isone_lowmantissap2(opnd3p2))) {
349 					Dbl_increment(opnd3p1,opnd3p2);
350 					if (Dbl_isone_hiddenoverflow(opnd3p1))
351 					is_tiny = FALSE;
352 					Dbl_decrement(opnd3p1,opnd3p2);
353 				}
354 				break;
355 			}
356 		}
357 
358 		/*
359 		 * denormalize result or set to signed zero
360 		 */
361 		stickybit = inexact;
362 		Dbl_denormalize(opnd3p1,opnd3p2,dest_exponent,guardbit,
363 		 stickybit,inexact);
364 
365 		/* return zero or smallest number */
366 		if (inexact) {
367 			switch (Rounding_mode()) {
368 			case ROUNDPLUS:
369 				if (Dbl_iszero_sign(resultp1)) {
370 					Dbl_increment(opnd3p1,opnd3p2);
371 				}
372 				break;
373 			case ROUNDMINUS:
374 				if (Dbl_isone_sign(resultp1)) {
375 					Dbl_increment(opnd3p1,opnd3p2);
376 				}
377 				break;
378 			case ROUNDNEAREST:
379 				if (guardbit && (stickybit ||
380 				    Dbl_isone_lowmantissap2(opnd3p2))) {
381 					Dbl_increment(opnd3p1,opnd3p2);
382 				}
383 				break;
384 			}
385 			if (is_tiny) Set_underflowflag();
386 		}
387 		Dbl_set_exponentmantissa(resultp1,resultp2,opnd3p1,opnd3p2);
388 	}
389 	else Dbl_set_exponent(resultp1,dest_exponent);
390 	/* check for inexact */
391 	Dbl_copytoptr(resultp1,resultp2,dstptr);
392 	if (inexact) {
393 		if (Is_inexacttrap_enabled()) return(INEXACTEXCEPTION);
394 		else Set_inexactflag();
395 	}
396 	return(NOEXCEPTION);
397 }
398