sparc/fp/_D_cplx_div.c

*0Sstevel@tonic-gate/*
*0Sstevel@tonic-gate * CDDL HEADER START
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the
*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
*0Sstevel@tonic-gate * (the "License").  You may not use this file except in compliance
*0Sstevel@tonic-gate * with the License.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
*0Sstevel@tonic-gate * See the License for the specific language governing permissions
*0Sstevel@tonic-gate * and limitations under the License.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * CDDL HEADER END
*0Sstevel@tonic-gate */
*0Sstevel@tonic-gate/*
*0Sstevel@tonic-gate * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
*0Sstevel@tonic-gate * Use is subject to license terms.
*0Sstevel@tonic-gate */
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate#pragma ident	"%Z%%M%	%I%	%E% SMI"
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate/*
*0Sstevel@tonic-gate * _D_cplx_div(z, w) returns z / w with infinities handled according
*0Sstevel@tonic-gate * to C99.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * If z and w are both finite and w is nonzero, _D_cplx_div(z, w)
*0Sstevel@tonic-gate * delivers the complex quotient q according to the usual formula:
*0Sstevel@tonic-gate * let a = Re(z), b = Im(z), c = Re(w), and d = Im(w); then q = x +
*0Sstevel@tonic-gate * I * y where x = (a * c + b * d) / r and y = (b * c - a * d) / r
*0Sstevel@tonic-gate * with r = c * c + d * d.  This implementation scales to avoid
*0Sstevel@tonic-gate * premature underflow or overflow.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * If z is neither NaN nor zero and w is zero, or if z is infinite
*0Sstevel@tonic-gate * and w is finite and nonzero, _D_cplx_div delivers an infinite
*0Sstevel@tonic-gate * result.  If z is finite and w is infinite, _D_cplx_div delivers
*0Sstevel@tonic-gate * a zero result.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * If z and w are both zero or both infinite, or if either z or w is
*0Sstevel@tonic-gate * a complex NaN, _D_cplx_div delivers NaN + I * NaN.  C99 doesn't
*0Sstevel@tonic-gate * specify these cases.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * This implementation can raise spurious underflow, overflow, in-
*0Sstevel@tonic-gate * valid operation, inexact, and division-by-zero exceptions.  C99
*0Sstevel@tonic-gate * allows this.
*0Sstevel@tonic-gate *
*0Sstevel@tonic-gate * Warning: Do not attempt to "optimize" this code by removing multi-
*0Sstevel@tonic-gate * plications by zero.
*0Sstevel@tonic-gate */
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate#if !defined(sparc) && !defined(__sparc)
*0Sstevel@tonic-gate#error This code is for SPARC only
*0Sstevel@tonic-gate#endif
*0Sstevel@tonic-gate
*0Sstevel@tonic-gatestatic union {
*0Sstevel@tonic-gate	int	i[2];
*0Sstevel@tonic-gate	double	d;
*0Sstevel@tonic-gate} inf = {
*0Sstevel@tonic-gate	0x7ff00000, 0
*0Sstevel@tonic-gate};
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate/*
*0Sstevel@tonic-gate * Return +1 if x is +Inf, -1 if x is -Inf, and 0 otherwise
*0Sstevel@tonic-gate */
*0Sstevel@tonic-gatestatic int
*0Sstevel@tonic-gatetestinf(double x)
*0Sstevel@tonic-gate{
*0Sstevel@tonic-gate	union {
*0Sstevel@tonic-gate		int	i[2];
*0Sstevel@tonic-gate		double	d;
*0Sstevel@tonic-gate	} xx;
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	xx.d = x;
*0Sstevel@tonic-gate	return (((((xx.i[0] << 1) - 0xffe00000) | xx.i[1]) == 0)?
*0Sstevel@tonic-gate		(1 | (xx.i[0] >> 31)) : 0);
*0Sstevel@tonic-gate}
*0Sstevel@tonic-gate
*0Sstevel@tonic-gatedouble _Complex
*0Sstevel@tonic-gate_D_cplx_div(double _Complex z, double _Complex w)
*0Sstevel@tonic-gate{
*0Sstevel@tonic-gate	double _Complex	v;
*0Sstevel@tonic-gate	union {
*0Sstevel@tonic-gate		int	i[2];
*0Sstevel@tonic-gate		double	d;
*0Sstevel@tonic-gate	} aa, bb, cc, dd, ss;
*0Sstevel@tonic-gate	double		a, b, c, d, r;
*0Sstevel@tonic-gate	int		ha, hb, hc, hd, hz, hw, hs, i, j;
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	/*
*0Sstevel@tonic-gate	 * The following is equivalent to
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 *  a = creal(z); b = cimag(z);
*0Sstevel@tonic-gate	 *  c = creal(w); d = cimag(w);
*0Sstevel@tonic-gate	 */
*0Sstevel@tonic-gate	a = ((double *)&z)[0];
*0Sstevel@tonic-gate	b = ((double *)&z)[1];
*0Sstevel@tonic-gate	c = ((double *)&w)[0];
*0Sstevel@tonic-gate	d = ((double *)&w)[1];
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	/* extract high-order words to estimate |z| and |w| */
*0Sstevel@tonic-gate	aa.d = a;
*0Sstevel@tonic-gate	bb.d = b;
*0Sstevel@tonic-gate	ha = aa.i[0] & ~0x80000000;
*0Sstevel@tonic-gate	hb = bb.i[0] & ~0x80000000;
*0Sstevel@tonic-gate	hz = (ha > hb)? ha : hb;
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	cc.d = c;
*0Sstevel@tonic-gate	dd.d = d;
*0Sstevel@tonic-gate	hc = cc.i[0] & ~0x80000000;
*0Sstevel@tonic-gate	hd = dd.i[0] & ~0x80000000;
*0Sstevel@tonic-gate	hw = (hc > hd)? hc : hd;
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	/* check for special cases */
*0Sstevel@tonic-gate	if (hw >= 0x7ff00000) { /* w is inf or nan */
*0Sstevel@tonic-gate		r = 0.0;
*0Sstevel@tonic-gate		i = testinf(c);
*0Sstevel@tonic-gate		j = testinf(d);
*0Sstevel@tonic-gate		if (i | j) { /* w is infinite */
*0Sstevel@tonic-gate			/*
*0Sstevel@tonic-gate			 * "factor out" infinity, being careful to preserve
*0Sstevel@tonic-gate			 * signs of finite values
*0Sstevel@tonic-gate			 */
*0Sstevel@tonic-gate			c = i? i : ((cc.i[0] < 0)? -0.0 : 0.0);
*0Sstevel@tonic-gate			d = j? j : ((dd.i[0] < 0)? -0.0 : 0.0);
*0Sstevel@tonic-gate			if (hz >= 0x7fe00000) {
*0Sstevel@tonic-gate				/* scale to avoid overflow below */
*0Sstevel@tonic-gate				c *= 0.5;
*0Sstevel@tonic-gate				d *= 0.5;
*0Sstevel@tonic-gate			}
*0Sstevel@tonic-gate		}
*0Sstevel@tonic-gate		((double *)&v)[0] = (a * c + b * d) * r;
*0Sstevel@tonic-gate		((double *)&v)[1] = (b * c - a * d) * r;
*0Sstevel@tonic-gate		return (v);
*0Sstevel@tonic-gate	}
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	if (hw < 0x00100000) {
*0Sstevel@tonic-gate		/*
*0Sstevel@tonic-gate		 * This nonsense is needed to work around some SPARC
*0Sstevel@tonic-gate		 * implementations of nonstandard mode; if both parts
*0Sstevel@tonic-gate		 * of w are subnormal, multiply them by one to force
*0Sstevel@tonic-gate		 * them to be flushed to zero when nonstandard mode
*0Sstevel@tonic-gate		 * is enabled.  Sheesh.
*0Sstevel@tonic-gate		 */
*0Sstevel@tonic-gate		cc.d = c = c * 1.0;
*0Sstevel@tonic-gate		dd.d = d = d * 1.0;
*0Sstevel@tonic-gate		hc = cc.i[0] & ~0x80000000;
*0Sstevel@tonic-gate		hd = dd.i[0] & ~0x80000000;
*0Sstevel@tonic-gate		hw = (hc > hd)? hc : hd;
*0Sstevel@tonic-gate	}
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	if (hw == 0 && (cc.i[1] | dd.i[1]) == 0) {
*0Sstevel@tonic-gate		/* w is zero; multiply z by 1/Re(w) - I * Im(w) */
*0Sstevel@tonic-gate		c = 1.0 / c;
*0Sstevel@tonic-gate		i = testinf(a);
*0Sstevel@tonic-gate		j = testinf(b);
*0Sstevel@tonic-gate		if (i | j) { /* z is infinite */
*0Sstevel@tonic-gate			a = i;
*0Sstevel@tonic-gate			b = j;
*0Sstevel@tonic-gate		}
*0Sstevel@tonic-gate		((double *)&v)[0] = a * c + b * d;
*0Sstevel@tonic-gate		((double *)&v)[1] = b * c - a * d;
*0Sstevel@tonic-gate		return (v);
*0Sstevel@tonic-gate	}
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	if (hz >= 0x7ff00000) { /* z is inf or nan */
*0Sstevel@tonic-gate		r = 1.0;
*0Sstevel@tonic-gate		i = testinf(a);
*0Sstevel@tonic-gate		j = testinf(b);
*0Sstevel@tonic-gate		if (i | j) { /* z is infinite */
*0Sstevel@tonic-gate			a = i;
*0Sstevel@tonic-gate			b = j;
*0Sstevel@tonic-gate			r = inf.d;
*0Sstevel@tonic-gate		}
*0Sstevel@tonic-gate		((double *)&v)[0] = (a * c + b * d) * r;
*0Sstevel@tonic-gate		((double *)&v)[1] = (b * c - a * d) * r;
*0Sstevel@tonic-gate		return (v);
*0Sstevel@tonic-gate	}
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	/*
*0Sstevel@tonic-gate	 * Scale c and d to compute 1/|w|^2 and the real and imaginary
*0Sstevel@tonic-gate	 * parts of the quotient.
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 * Note that for any s, if we let c' = sc, d' = sd, c'' = sc',
*0Sstevel@tonic-gate	 * and d'' = sd', then
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 *  (ac'' + bd'') / (c'^2 + d'^2) = (ac + bd) / (c^2 + d^2)
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 * and similarly for the imaginary part of the quotient.  We want
*0Sstevel@tonic-gate	 * to choose s such that (i) r := 1/(c'^2 + d'^2) can be computed
*0Sstevel@tonic-gate	 * without overflow or harmful underflow, and (ii) (ac'' + bd'')
*0Sstevel@tonic-gate	 * and (bc'' - ad'') can be computed without spurious overflow or
*0Sstevel@tonic-gate	 * harmful underflow.  To avoid unnecessary rounding, we restrict
*0Sstevel@tonic-gate	 * s to a power of two.
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 * To satisfy (i), we need to choose s such that max(|c'|,|d'|)
*0Sstevel@tonic-gate	 * is not too far from one.  To satisfy (ii), we need to choose
*0Sstevel@tonic-gate	 * s such that max(|c''|,|d''|) is also not too far from one.
*0Sstevel@tonic-gate	 * There is some leeway in our choice, but to keep the logic
*0Sstevel@tonic-gate	 * from getting overly complicated, we simply attempt to roughly
*0Sstevel@tonic-gate	 * balance these constraints by choosing s so as to make r about
*0Sstevel@tonic-gate	 * the same size as max(|c''|,|d''|).  This corresponds to choos-
*0Sstevel@tonic-gate	 * ing s to be a power of two near |w|^(-3/4).
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 * Regarding overflow, observe that if max(|c''|,|d''|) <= 1/2,
*0Sstevel@tonic-gate	 * then the computation of (ac'' + bd'') and (bc'' - ad'') can-
*0Sstevel@tonic-gate	 * not overflow; otherwise, the computation of either of these
*0Sstevel@tonic-gate	 * values can only incur overflow if the true result would be
*0Sstevel@tonic-gate	 * within a factor of two of the overflow threshold.  In other
*0Sstevel@tonic-gate	 * words, if we bias the choice of s such that at least one of
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 *  max(|c''|,|d''|) <= 1/2   or   r >= 2
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 * always holds, then no undeserved overflow can occur.
*0Sstevel@tonic-gate	 *
*0Sstevel@tonic-gate	 * To cope with underflow, note that if r < 2^-53, then any
*0Sstevel@tonic-gate	 * intermediate results that underflow are insignificant; either
*0Sstevel@tonic-gate	 * they will be added to normal results, rendering the under-
*0Sstevel@tonic-gate	 * flow no worse than ordinary roundoff, or they will contribute
*0Sstevel@tonic-gate	 * to a final result that is smaller than the smallest subnormal
*0Sstevel@tonic-gate	 * number.  Therefore, we need only modify the preceding logic
*0Sstevel@tonic-gate	 * when z is very small and w is not too far from one.  In that
*0Sstevel@tonic-gate	 * case, we can reduce the effect of any intermediate underflow
*0Sstevel@tonic-gate	 * to no worse than ordinary roundoff error by choosing s so as
*0Sstevel@tonic-gate	 * to make max(|c''|,|d''|) large enough that at least one of
*0Sstevel@tonic-gate	 * (ac'' + bd'') or (bc'' - ad'') is normal.
*0Sstevel@tonic-gate	 */
*0Sstevel@tonic-gate	hs = (((hw >> 2) - hw) + 0x6fd7ffff) & 0xfff00000;
*0Sstevel@tonic-gate	if (hz < 0x07200000) { /* |z| < 2^-909 */
*0Sstevel@tonic-gate		if (((hw - 0x32800000) | (0x47100000 - hw)) >= 0)
*0Sstevel@tonic-gate			hs = (((0x47100000 - hw) >> 1) & 0xfff00000)
*0Sstevel@tonic-gate				+ 0x3ff00000;
*0Sstevel@tonic-gate	}
*0Sstevel@tonic-gate	ss.i[0] = hs;
*0Sstevel@tonic-gate	ss.i[1] = 0;
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	c *= ss.d;
*0Sstevel@tonic-gate	d *= ss.d;
*0Sstevel@tonic-gate	r = 1.0 / (c * c + d * d);
*0Sstevel@tonic-gate
*0Sstevel@tonic-gate	c *= ss.d;
*0Sstevel@tonic-gate	d *= ss.d;
*0Sstevel@tonic-gate	((double *)&v)[0] = (a * c + b * d) * r;
*0Sstevel@tonic-gate	((double *)&v)[1] = (b * c - a * d) * r;
*0Sstevel@tonic-gate	return (v);
*0Sstevel@tonic-gate}