1*0Sstevel@tonic-gate! 2*0Sstevel@tonic-gate! Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3*0Sstevel@tonic-gate! Use is subject to license terms. 4*0Sstevel@tonic-gate! 5*0Sstevel@tonic-gate! CDDL HEADER START 6*0Sstevel@tonic-gate! 7*0Sstevel@tonic-gate! The contents of this file are subject to the terms of the 8*0Sstevel@tonic-gate! Common Development and Distribution License, Version 1.0 only 9*0Sstevel@tonic-gate! (the "License"). You may not use this file except in compliance 10*0Sstevel@tonic-gate! with the License. 11*0Sstevel@tonic-gate! 12*0Sstevel@tonic-gate! You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 13*0Sstevel@tonic-gate! or http://www.opensolaris.org/os/licensing. 14*0Sstevel@tonic-gate! See the License for the specific language governing permissions 15*0Sstevel@tonic-gate! and limitations under the License. 16*0Sstevel@tonic-gate! 17*0Sstevel@tonic-gate! When distributing Covered Code, include this CDDL HEADER in each 18*0Sstevel@tonic-gate! file and include the License file at usr/src/OPENSOLARIS.LICENSE. 19*0Sstevel@tonic-gate! If applicable, add the following below this CDDL HEADER, with the 20*0Sstevel@tonic-gate! fields enclosed by brackets "[]" replaced with your own identifying 21*0Sstevel@tonic-gate! information: Portions Copyright [yyyy] [name of copyright owner] 22*0Sstevel@tonic-gate! 23*0Sstevel@tonic-gate! CDDL HEADER END 24*0Sstevel@tonic-gate! 25*0Sstevel@tonic-gate 26*0Sstevel@tonic-gate.ident "%Z%%M% %I% %E% SMI" 27*0Sstevel@tonic-gate 28*0Sstevel@tonic-gate! /* 29*0Sstevel@tonic-gate! * This file contains __quad_mag_add and __quad_mag_sub, the core 30*0Sstevel@tonic-gate! * of the quad precision add and subtract operations. 31*0Sstevel@tonic-gate! */ 32*0Sstevel@tonic-gate! SPARC V9 version hand-coded in assembly to use 64-bit integer registers 33*0Sstevel@tonic-gate 34*0Sstevel@tonic-gate .file "__quad_mag64.s" 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gate#include <sys/asm_linkage.h> 37*0Sstevel@tonic-gate 38*0Sstevel@tonic-gate! union longdouble { 39*0Sstevel@tonic-gate! struct { 40*0Sstevel@tonic-gate! unsigned int msw; 41*0Sstevel@tonic-gate! unsigned int frac2; 42*0Sstevel@tonic-gate! unsigned int frac3; 43*0Sstevel@tonic-gate! unsigned int frac4; 44*0Sstevel@tonic-gate! } l; 45*0Sstevel@tonic-gate! struct { 46*0Sstevel@tonic-gate! unsigned long msll; 47*0Sstevel@tonic-gate! unsigned long frac; 48*0Sstevel@tonic-gate! } ll; 49*0Sstevel@tonic-gate! long double d; 50*0Sstevel@tonic-gate! }; 51*0Sstevel@tonic-gate! 52*0Sstevel@tonic-gate! /* 53*0Sstevel@tonic-gate! * __quad_mag_add(x, y, z, fsr) 54*0Sstevel@tonic-gate! * 55*0Sstevel@tonic-gate! * Sets *z = *x + *y, rounded according to the rounding mode in *fsr, 56*0Sstevel@tonic-gate! * and updates the current exceptions in *fsr. This routine assumes 57*0Sstevel@tonic-gate! * *x and *y are finite, with the same sign (i.e., an addition of 58*0Sstevel@tonic-gate! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. 59*0Sstevel@tonic-gate! */ 60*0Sstevel@tonic-gate! void 61*0Sstevel@tonic-gate! __quad_mag_add(const union longdouble *x, const union longdouble *y, 62*0Sstevel@tonic-gate! union longdouble *z, unsigned int *fsr) 63*0Sstevel@tonic-gate! { 64*0Sstevel@tonic-gate! unsigned long lx, ly, frac, sticky; 65*0Sstevel@tonic-gate! unsigned int ex, ey, round, rm; 66*0Sstevel@tonic-gate! int e, uflo; 67*0Sstevel@tonic-gate! 68*0Sstevel@tonic-gate! /* get the leading significand double-words and exponents */ 69*0Sstevel@tonic-gate! ex = (x->ll.msll >> 48) & 0x7fff; 70*0Sstevel@tonic-gate! lx = x->ll.msll & ~0xffff000000000000ul; 71*0Sstevel@tonic-gate! if (ex == 0) 72*0Sstevel@tonic-gate! ex = 1; 73*0Sstevel@tonic-gate! else 74*0Sstevel@tonic-gate! lx |= 0x0001000000000000ul; 75*0Sstevel@tonic-gate! 76*0Sstevel@tonic-gate! ey = (y->ll.msll >> 48) & 0x7fff; 77*0Sstevel@tonic-gate! ly = y->ll.msll & ~0xffff000000000000ul; 78*0Sstevel@tonic-gate! if (ey == 0) 79*0Sstevel@tonic-gate! ey = 1; 80*0Sstevel@tonic-gate! else 81*0Sstevel@tonic-gate! ly |= 0x0001000000000000ul; 82*0Sstevel@tonic-gate! 83*0Sstevel@tonic-gate! /* prenormalize y */ 84*0Sstevel@tonic-gate! e = (int) ex - (int) ey; 85*0Sstevel@tonic-gate! round = sticky = 0; 86*0Sstevel@tonic-gate! if (e >= 114) { 87*0Sstevel@tonic-gate! frac = x->ll.frac; 88*0Sstevel@tonic-gate! sticky = ly | y->ll.frac; 89*0Sstevel@tonic-gate! } else { 90*0Sstevel@tonic-gate! frac = y->ll.frac; 91*0Sstevel@tonic-gate! if (e >= 64) { 92*0Sstevel@tonic-gate! sticky = frac & 0x7ffffffffffffffful; 93*0Sstevel@tonic-gate! round = frac >> 63; 94*0Sstevel@tonic-gate! frac = ly; 95*0Sstevel@tonic-gate! ly = 0; 96*0Sstevel@tonic-gate! e -= 64; 97*0Sstevel@tonic-gate! } 98*0Sstevel@tonic-gate! if (e) { 99*0Sstevel@tonic-gate! sticky |= round | (frac & ((1ul << (e - 1)) - 1)); 100*0Sstevel@tonic-gate! round = (frac >> (e - 1)) & 1; 101*0Sstevel@tonic-gate! frac = (frac >> e) | (ly << (64 - e)); 102*0Sstevel@tonic-gate! ly >>= e; 103*0Sstevel@tonic-gate! } 104*0Sstevel@tonic-gate! 105*0Sstevel@tonic-gate! /* add, propagating carries */ 106*0Sstevel@tonic-gate! frac += x->ll.frac; 107*0Sstevel@tonic-gate! lx += ly; 108*0Sstevel@tonic-gate! if (frac < x->ll.frac) 109*0Sstevel@tonic-gate! lx++; 110*0Sstevel@tonic-gate! 111*0Sstevel@tonic-gate! /* postnormalize */ 112*0Sstevel@tonic-gate! if (lx >= 0x0002000000000000ul) { 113*0Sstevel@tonic-gate! sticky |= round; 114*0Sstevel@tonic-gate! round = frac & 1; 115*0Sstevel@tonic-gate! frac = (frac >> 1) | (lx << 63); 116*0Sstevel@tonic-gate! lx >>= 1; 117*0Sstevel@tonic-gate! ex++; 118*0Sstevel@tonic-gate! } 119*0Sstevel@tonic-gate! } 120*0Sstevel@tonic-gate! 121*0Sstevel@tonic-gate! /* keep track of whether the result before rounding is tiny */ 122*0Sstevel@tonic-gate! uflo = (lx < 0x0001000000000000ul); 123*0Sstevel@tonic-gate! 124*0Sstevel@tonic-gate! /* get the rounding mode, fudging directed rounding modes 125*0Sstevel@tonic-gate! as though the result were positive */ 126*0Sstevel@tonic-gate! rm = *fsr >> 30; 127*0Sstevel@tonic-gate! if (z->l.msw) 128*0Sstevel@tonic-gate! rm ^= (rm >> 1); 129*0Sstevel@tonic-gate! 130*0Sstevel@tonic-gate! /* see if we need to round */ 131*0Sstevel@tonic-gate! if (round | sticky) { 132*0Sstevel@tonic-gate! *fsr |= FSR_NXC; 133*0Sstevel@tonic-gate! 134*0Sstevel@tonic-gate! /* round up if necessary */ 135*0Sstevel@tonic-gate! if (rm == FSR_RP || (rm == FSR_RN && round && 136*0Sstevel@tonic-gate! (sticky || (frac & 1)))) { 137*0Sstevel@tonic-gate! if (++frac == 0) 138*0Sstevel@tonic-gate! if (++lx >= 0x0002000000000000ul) { 139*0Sstevel@tonic-gate! lx >>= 1; 140*0Sstevel@tonic-gate! ex++; 141*0Sstevel@tonic-gate! } 142*0Sstevel@tonic-gate! } 143*0Sstevel@tonic-gate! } 144*0Sstevel@tonic-gate! 145*0Sstevel@tonic-gate! /* check for overflow */ 146*0Sstevel@tonic-gate! if (ex >= 0x7fff) { 147*0Sstevel@tonic-gate! /* store the default overflowed result */ 148*0Sstevel@tonic-gate! *fsr |= FSR_OFC | FSR_NXC; 149*0Sstevel@tonic-gate! if (rm == FSR_RN || rm == FSR_RP) { 150*0Sstevel@tonic-gate! z->l.msw |= 0x7fff0000; 151*0Sstevel@tonic-gate! z->l.frac2 = 0; 152*0Sstevel@tonic-gate! z->ll.frac = 0; 153*0Sstevel@tonic-gate! } else { 154*0Sstevel@tonic-gate! z->l.msw |= 0x7ffeffff; 155*0Sstevel@tonic-gate! z->l.frac2 = 0xffffffff; 156*0Sstevel@tonic-gate! z->ll.frac = 0xfffffffffffffffful; 157*0Sstevel@tonic-gate! } 158*0Sstevel@tonic-gate! } else { 159*0Sstevel@tonic-gate! /* store the result */ 160*0Sstevel@tonic-gate! if (lx >= 0x0001000000000000ul) 161*0Sstevel@tonic-gate! z->l.msw |= (ex << 16); 162*0Sstevel@tonic-gate! z->l.msw |= (lx >> 32) & 0xffff; 163*0Sstevel@tonic-gate! z->l.frac2 = (lx & 0xffffffff); 164*0Sstevel@tonic-gate! z->ll.frac = frac; 165*0Sstevel@tonic-gate! 166*0Sstevel@tonic-gate! /* if the pre-rounded result was tiny and underflow trapping 167*0Sstevel@tonic-gate! is enabled, simulate underflow */ 168*0Sstevel@tonic-gate! if (uflo && (*fsr & FSR_UFM)) 169*0Sstevel@tonic-gate! *fsr |= FSR_UFC; 170*0Sstevel@tonic-gate! } 171*0Sstevel@tonic-gate! } 172*0Sstevel@tonic-gate 173*0Sstevel@tonic-gate ENTRY(__quad_mag_add) 174*0Sstevel@tonic-gate save %sp,-SA(MINFRAME),%sp 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate sethi %hi(0xffff0000),%g1 177*0Sstevel@tonic-gate sllx %g1,32,%g1 ! g1 = 0xffff000000000000 178*0Sstevel@tonic-gate 179*0Sstevel@tonic-gate sethi %hi(0x7fff),%l7 180*0Sstevel@tonic-gate or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff 181*0Sstevel@tonic-gate 182*0Sstevel@tonic-gate ldx [%i0],%o0 183*0Sstevel@tonic-gate srlx %o0,48,%l0 184*0Sstevel@tonic-gate andcc %l0,%l7,%l0 ! l0 = ex 185*0Sstevel@tonic-gate beq,pn %icc,1f 186*0Sstevel@tonic-gate andn %o0,%g1,%o0 ! o0 = lx 187*0Sstevel@tonic-gate ba,pt %icc,2f 188*0Sstevel@tonic-gate sub %o0,%g1,%o0 189*0Sstevel@tonic-gate1: 190*0Sstevel@tonic-gate mov 1,%l0 191*0Sstevel@tonic-gate2: 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate ldx [%i1],%o1 194*0Sstevel@tonic-gate srlx %o1,48,%l1 195*0Sstevel@tonic-gate andcc %l1,%l7,%l1 ! l1 = ey 196*0Sstevel@tonic-gate beq,pn %icc,1f 197*0Sstevel@tonic-gate andn %o1,%g1,%o1 ! o1 = ly 198*0Sstevel@tonic-gate ba,pt %icc,2f 199*0Sstevel@tonic-gate sub %o1,%g1,%o1 200*0Sstevel@tonic-gate1: 201*0Sstevel@tonic-gate mov 1,%l1 202*0Sstevel@tonic-gate2: 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gate sub %l0,%l1,%l1 ! l1 = e = ex - ey 205*0Sstevel@tonic-gate cmp %l1,114 ! see if we need to prenormalize 206*0Sstevel@tonic-gate bge,pn %icc,1f 207*0Sstevel@tonic-gate mov 0,%l6 ! l6 = round 208*0Sstevel@tonic-gate mov 0,%o7 ! o7 = sticky 209*0Sstevel@tonic-gate cmp %l1,64 210*0Sstevel@tonic-gate bl,pt %icc,3f 211*0Sstevel@tonic-gate ldx [%i1+8],%o2 ! o2 = frac 212*0Sstevel@tonic-gate sllx %o2,1,%o7 ! lop off high order bit 213*0Sstevel@tonic-gate srlx %o2,63,%l6 214*0Sstevel@tonic-gate mov %o1,%o2 215*0Sstevel@tonic-gate mov 0,%o1 216*0Sstevel@tonic-gate sub %l1,64,%l1 217*0Sstevel@tonic-gate3: 218*0Sstevel@tonic-gate tst %l1 219*0Sstevel@tonic-gate beq,pn %icc,4f 220*0Sstevel@tonic-gate sub %l1,1,%l2 221*0Sstevel@tonic-gate mov 1,%o3 222*0Sstevel@tonic-gate sllx %o3,%l2,%o3 223*0Sstevel@tonic-gate sub %o3,1,%o3 224*0Sstevel@tonic-gate and %o3,%o2,%o3 225*0Sstevel@tonic-gate or %o3,%l6,%o3 226*0Sstevel@tonic-gate or %o7,%o3,%o7 227*0Sstevel@tonic-gate srlx %o2,%l2,%o4 228*0Sstevel@tonic-gate and %o4,1,%l6 229*0Sstevel@tonic-gate srlx %o2,%l1,%o2 230*0Sstevel@tonic-gate mov 64,%l3 231*0Sstevel@tonic-gate sub %l3,%l1,%l3 232*0Sstevel@tonic-gate sllx %o1,%l3,%o5 233*0Sstevel@tonic-gate or %o2,%o5,%o2 234*0Sstevel@tonic-gate srlx %o1,%l1,%o1 235*0Sstevel@tonic-gate4: 236*0Sstevel@tonic-gate ldx [%i0+8],%o3 237*0Sstevel@tonic-gate add %o2,%o3,%o2 ! add, propagating carry 238*0Sstevel@tonic-gate cmp %o2,%o3 239*0Sstevel@tonic-gate bgeu,pt %xcc,5f 240*0Sstevel@tonic-gate add %o0,%o1,%o0 241*0Sstevel@tonic-gate add %o0,1,%o0 242*0Sstevel@tonic-gate5: 243*0Sstevel@tonic-gate srlx %o0,49,%o5 ! if sum carried out, postnormalize 244*0Sstevel@tonic-gate tst %o5 245*0Sstevel@tonic-gate beq,pt %icc,2f 246*0Sstevel@tonic-gate nop 247*0Sstevel@tonic-gate or %o7,%l6,%o7 248*0Sstevel@tonic-gate and %o2,1,%l6 249*0Sstevel@tonic-gate srlx %o2,1,%o2 250*0Sstevel@tonic-gate sllx %o0,63,%o3 251*0Sstevel@tonic-gate or %o2,%o3,%o2 252*0Sstevel@tonic-gate srlx %o0,1,%o0 253*0Sstevel@tonic-gate ba,pt %icc,2f 254*0Sstevel@tonic-gate add %l0,1,%l0 255*0Sstevel@tonic-gate1: 256*0Sstevel@tonic-gate ldx [%i0+8],%o2 ! (full prenormalization shift case) 257*0Sstevel@tonic-gate ldx [%i1+8],%o3 258*0Sstevel@tonic-gate or %o1,%o3,%o7 259*0Sstevel@tonic-gate2: 260*0Sstevel@tonic-gate 261*0Sstevel@tonic-gate add %o0,%g1,%o1 ! see if sum is tiny 262*0Sstevel@tonic-gate srlx %o1,63,%l2 ! l2 = uflo 263*0Sstevel@tonic-gate 264*0Sstevel@tonic-gate ld [%i3],%i4 ! get the rounding mode 265*0Sstevel@tonic-gate srl %i4,30,%l3 ! l3 = rm 266*0Sstevel@tonic-gate ld [%i2],%l4 ! l4 = z->l.msw 267*0Sstevel@tonic-gate tst %l4 268*0Sstevel@tonic-gate beq,pn %icc,1f 269*0Sstevel@tonic-gate srl %l3,1,%l5 270*0Sstevel@tonic-gate xor %l3,%l5,%l3 271*0Sstevel@tonic-gate1: 272*0Sstevel@tonic-gate 273*0Sstevel@tonic-gate orcc %o7,%l6,%g0 ! see if we need to round 274*0Sstevel@tonic-gate beq,pn %xcc,1f 275*0Sstevel@tonic-gate andcc %l3,1,%g0 276*0Sstevel@tonic-gate or %i4,1,%i4 277*0Sstevel@tonic-gate bne,pn %icc,1f 278*0Sstevel@tonic-gate tst %l3 279*0Sstevel@tonic-gate bne,pn %icc,2f 280*0Sstevel@tonic-gate tst %l6 281*0Sstevel@tonic-gate beq,pn %icc,1f 282*0Sstevel@tonic-gate and %o2,1,%o3 283*0Sstevel@tonic-gate orcc %o3,%o7,%g0 284*0Sstevel@tonic-gate beq,pn %xcc,1f 285*0Sstevel@tonic-gate nop 286*0Sstevel@tonic-gate2: 287*0Sstevel@tonic-gate addcc %o2,1,%o2 ! round up and check for carry out 288*0Sstevel@tonic-gate bne,pt %xcc,1f 289*0Sstevel@tonic-gate nop 290*0Sstevel@tonic-gate add %o0,1,%o0 291*0Sstevel@tonic-gate srlx %o0,49,%o1 292*0Sstevel@tonic-gate tst %o1 293*0Sstevel@tonic-gate beq,pt %icc,1f 294*0Sstevel@tonic-gate nop 295*0Sstevel@tonic-gate srlx %o0,1,%o0 296*0Sstevel@tonic-gate add %l0,1,%l0 297*0Sstevel@tonic-gate1: 298*0Sstevel@tonic-gate 299*0Sstevel@tonic-gate cmp %l0,%l7 ! check for overflow 300*0Sstevel@tonic-gate bge,pn %icc,1f 301*0Sstevel@tonic-gate addcc %o0,%g1,%g0 302*0Sstevel@tonic-gate bl,pn %xcc,2f 303*0Sstevel@tonic-gate sll %l0,16,%l1 304*0Sstevel@tonic-gate or %l4,%l1,%l4 305*0Sstevel@tonic-gate2: 306*0Sstevel@tonic-gate sllx %o0,16,%o1 307*0Sstevel@tonic-gate srlx %o1,48,%o1 308*0Sstevel@tonic-gate or %l4,%o1,%l4 309*0Sstevel@tonic-gate st %l4,[%i2] 310*0Sstevel@tonic-gate st %o0,[%i2+4] 311*0Sstevel@tonic-gate stx %o2,[%i2+8] 312*0Sstevel@tonic-gate tst %l2 ! see if we need to raise underflow 313*0Sstevel@tonic-gate beq,pt %icc,3f 314*0Sstevel@tonic-gate srl %i4,23,%i5 315*0Sstevel@tonic-gate andcc %i5,4,%i5 316*0Sstevel@tonic-gate ba,pt %icc,3f 317*0Sstevel@tonic-gate or %i4,%i5,%i4 318*0Sstevel@tonic-gate 319*0Sstevel@tonic-gate1: 320*0Sstevel@tonic-gate andcc %l3,1,%g0 321*0Sstevel@tonic-gate bne,pn %icc,2f 322*0Sstevel@tonic-gate or %i4,9,%i4 ! overflow 323*0Sstevel@tonic-gate sll %l7,16,%l7 ! 7fff00000... 324*0Sstevel@tonic-gate or %l4,%l7,%l4 325*0Sstevel@tonic-gate st %l4,[%i2] 326*0Sstevel@tonic-gate st %g0,[%i2+4] 327*0Sstevel@tonic-gate ba,pt %icc,3f 328*0Sstevel@tonic-gate stx %g0,[%i2+8] 329*0Sstevel@tonic-gate2: 330*0Sstevel@tonic-gate mov -1,%o0 ! 7ffeffff... 331*0Sstevel@tonic-gate sll %l7,16,%l7 332*0Sstevel@tonic-gate add %o0,%l7,%l7 333*0Sstevel@tonic-gate or %l4,%l7,%l4 334*0Sstevel@tonic-gate st %l4,[%i2] 335*0Sstevel@tonic-gate st %o0,[%i2+4] 336*0Sstevel@tonic-gate stx %o0,[%i2+8] 337*0Sstevel@tonic-gate 338*0Sstevel@tonic-gate3: 339*0Sstevel@tonic-gate st %i4,[%i3] 340*0Sstevel@tonic-gate ret 341*0Sstevel@tonic-gate restore 342*0Sstevel@tonic-gate 343*0Sstevel@tonic-gate SET_SIZE(__quad_mag_add) 344*0Sstevel@tonic-gate 345*0Sstevel@tonic-gate! /* 346*0Sstevel@tonic-gate! * __quad_mag_sub(x, y, z, fsr) 347*0Sstevel@tonic-gate! * 348*0Sstevel@tonic-gate! * Sets *z = *x - *y, rounded according to the rounding mode in *fsr, 349*0Sstevel@tonic-gate! * and updates the current exceptions in *fsr. This routine assumes 350*0Sstevel@tonic-gate! * *x and *y are finite, with opposite signs (i.e., a subtraction of 351*0Sstevel@tonic-gate! * magnitudes), |*x| >= |*y|, and *z already has its sign bit set. 352*0Sstevel@tonic-gate! */ 353*0Sstevel@tonic-gate! void 354*0Sstevel@tonic-gate! __quad_mag_sub(const union longdouble *x, const union longdouble *y, 355*0Sstevel@tonic-gate! union longdouble *z, unsigned int *fsr) 356*0Sstevel@tonic-gate! { 357*0Sstevel@tonic-gate! unsigned long lx, ly, frac, sticky; 358*0Sstevel@tonic-gate! unsigned int ex, ey, gr, borrow, rm; 359*0Sstevel@tonic-gate! int e; 360*0Sstevel@tonic-gate! 361*0Sstevel@tonic-gate! /* get the leading significand double-words and exponents */ 362*0Sstevel@tonic-gate! ex = (x->ll.msll >> 48) & 0x7fff; 363*0Sstevel@tonic-gate! lx = x->ll.msll & ~0xffff000000000000ul; 364*0Sstevel@tonic-gate! if (ex == 0) 365*0Sstevel@tonic-gate! ex = 1; 366*0Sstevel@tonic-gate! else 367*0Sstevel@tonic-gate! lx |= 0x0001000000000000ul; 368*0Sstevel@tonic-gate! 369*0Sstevel@tonic-gate! ey = (y->ll.msll >> 48) & 0x7fff; 370*0Sstevel@tonic-gate! ly = y->ll.msll & ~0xffff000000000000ul; 371*0Sstevel@tonic-gate! if (ey == 0) 372*0Sstevel@tonic-gate! ey = 1; 373*0Sstevel@tonic-gate! else 374*0Sstevel@tonic-gate! ly |= 0x0001000000000000ul; 375*0Sstevel@tonic-gate! 376*0Sstevel@tonic-gate! /* prenormalize y */ 377*0Sstevel@tonic-gate! e = (int) ex - (int) ey; 378*0Sstevel@tonic-gate! gr = sticky = 0; 379*0Sstevel@tonic-gate! if (e > 114) { 380*0Sstevel@tonic-gate! sticky = ly | y->ll.frac; 381*0Sstevel@tonic-gate! ly = frac = 0; 382*0Sstevel@tonic-gate! } else { 383*0Sstevel@tonic-gate! frac = y->ll.frac; 384*0Sstevel@tonic-gate! if (e >= 64) { 385*0Sstevel@tonic-gate! gr = frac >> 62; 386*0Sstevel@tonic-gate! sticky = frac << 2; 387*0Sstevel@tonic-gate! frac = ly; 388*0Sstevel@tonic-gate! ly = 0; 389*0Sstevel@tonic-gate! e -= 64; 390*0Sstevel@tonic-gate! } 391*0Sstevel@tonic-gate! if (e > 1) { 392*0Sstevel@tonic-gate! sticky |= gr | (frac & ((1ul << (e - 2)) - 1)); 393*0Sstevel@tonic-gate! gr = (frac >> (e - 2)) & 3; 394*0Sstevel@tonic-gate! frac = (frac >> e) | (ly << (64 - e)); 395*0Sstevel@tonic-gate! ly >>= e; 396*0Sstevel@tonic-gate! } else if (e == 1) { 397*0Sstevel@tonic-gate! sticky |= (gr & 1); 398*0Sstevel@tonic-gate! gr = (gr >> 1) | ((frac & 1) << 1); 399*0Sstevel@tonic-gate! frac = (frac >> 1) | (ly << 63); 400*0Sstevel@tonic-gate! ly >>= 1; 401*0Sstevel@tonic-gate! } 402*0Sstevel@tonic-gate! } 403*0Sstevel@tonic-gate! 404*0Sstevel@tonic-gate! /* complement guard, round, and sticky as need be */ 405*0Sstevel@tonic-gate! gr <<= 1; 406*0Sstevel@tonic-gate! if (sticky) 407*0Sstevel@tonic-gate! gr |= 1; 408*0Sstevel@tonic-gate! gr = (-gr & 7); 409*0Sstevel@tonic-gate! if (gr) 410*0Sstevel@tonic-gate! if (++frac == 0) 411*0Sstevel@tonic-gate! ly++; 412*0Sstevel@tonic-gate! 413*0Sstevel@tonic-gate! /* subtract, propagating borrows */ 414*0Sstevel@tonic-gate! frac = x->ll.frac - frac; 415*0Sstevel@tonic-gate! lx -= ly; 416*0Sstevel@tonic-gate! if (frac > x->ll.frac) 417*0Sstevel@tonic-gate! lx--; 418*0Sstevel@tonic-gate! 419*0Sstevel@tonic-gate! /* get the rounding mode */ 420*0Sstevel@tonic-gate! rm = *fsr >> 30; 421*0Sstevel@tonic-gate! 422*0Sstevel@tonic-gate! /* handle zero result */ 423*0Sstevel@tonic-gate! if (!(lx | frac | gr)) { 424*0Sstevel@tonic-gate! z->l.msw = ((rm == FSR_RM)? 0x80000000 : 0); 425*0Sstevel@tonic-gate! z->l.frac2 = z->l.frac3 = z->l.frac4 = 0; 426*0Sstevel@tonic-gate! return; 427*0Sstevel@tonic-gate! } 428*0Sstevel@tonic-gate! 429*0Sstevel@tonic-gate! /* postnormalize */ 430*0Sstevel@tonic-gate! if (lx < 0x0001000000000000ul) { 431*0Sstevel@tonic-gate! /* if cancellation occurred or the exponent is 1, 432*0Sstevel@tonic-gate! the result is exact */ 433*0Sstevel@tonic-gate! if (lx < 0x0000800000000000ul || ex == 1) { 434*0Sstevel@tonic-gate! if ((lx | (frac & 0xfffe000000000000ul)) == 0 && 435*0Sstevel@tonic-gate! ex > 64) { 436*0Sstevel@tonic-gate! lx = frac; 437*0Sstevel@tonic-gate! frac = (unsigned long) gr << 61; 438*0Sstevel@tonic-gate! gr = 0; 439*0Sstevel@tonic-gate! ex -= 64; 440*0Sstevel@tonic-gate! } 441*0Sstevel@tonic-gate! while (lx < 0x0001000000000000ul && ex > 1) { 442*0Sstevel@tonic-gate! lx = (lx << 1) | (frac >> 63); 443*0Sstevel@tonic-gate! frac = (frac << 1) | (gr >> 2); 444*0Sstevel@tonic-gate! gr = 0; 445*0Sstevel@tonic-gate! ex--; 446*0Sstevel@tonic-gate! } 447*0Sstevel@tonic-gate! if (lx >= 0x0001000000000000ul) 448*0Sstevel@tonic-gate! z->l.msw |= (ex << 16); 449*0Sstevel@tonic-gate! z->l.msw |= ((lx >> 32) & 0xffff); 450*0Sstevel@tonic-gate! z->l.frac2 = (lx & 0xffffffff); 451*0Sstevel@tonic-gate! z->ll.frac = frac; 452*0Sstevel@tonic-gate! 453*0Sstevel@tonic-gate! /* if the result is tiny and underflow trapping is 454*0Sstevel@tonic-gate! enabled, simulate underflow */ 455*0Sstevel@tonic-gate! if (lx < 0x0001000000000000ul && (*fsr & FSR_UFM)) 456*0Sstevel@tonic-gate! *fsr |= FSR_UFC; 457*0Sstevel@tonic-gate! return; 458*0Sstevel@tonic-gate! } 459*0Sstevel@tonic-gate! 460*0Sstevel@tonic-gate! /* otherwise we only borrowed one place */ 461*0Sstevel@tonic-gate! lx = (lx << 1) | (frac >> 63); 462*0Sstevel@tonic-gate! frac = (frac << 1) | (gr >> 2); 463*0Sstevel@tonic-gate! gr &= 3; 464*0Sstevel@tonic-gate! ex--; 465*0Sstevel@tonic-gate! } 466*0Sstevel@tonic-gate! else 467*0Sstevel@tonic-gate! gr = (gr >> 1) | (gr & 1); 468*0Sstevel@tonic-gate! 469*0Sstevel@tonic-gate! /* fudge directed rounding modes as though the result were positive */ 470*0Sstevel@tonic-gate! if (z->l.msw) 471*0Sstevel@tonic-gate! rm ^= (rm >> 1); 472*0Sstevel@tonic-gate! 473*0Sstevel@tonic-gate! /* see if we need to round */ 474*0Sstevel@tonic-gate! if (gr) { 475*0Sstevel@tonic-gate! *fsr |= FSR_NXC; 476*0Sstevel@tonic-gate! 477*0Sstevel@tonic-gate! /* round up if necessary */ 478*0Sstevel@tonic-gate! if (rm == FSR_RP || (rm == FSR_RN && (gr & 2) && 479*0Sstevel@tonic-gate! ((gr & 1) || (frac & 1)))) { 480*0Sstevel@tonic-gate! if (++frac == 0) 481*0Sstevel@tonic-gate! if (++lx >= 0x0002000000000000ul) { 482*0Sstevel@tonic-gate! lx >>= 1; 483*0Sstevel@tonic-gate! ex++; 484*0Sstevel@tonic-gate! } 485*0Sstevel@tonic-gate! } 486*0Sstevel@tonic-gate! } 487*0Sstevel@tonic-gate! 488*0Sstevel@tonic-gate! /* store the result */ 489*0Sstevel@tonic-gate! z->l.msw |= (ex << 16) | ((lx >> 32) & 0xffff); 490*0Sstevel@tonic-gate! z->l.frac2 = (lx & 0xffffffff); 491*0Sstevel@tonic-gate! z->ll.frac = frac; 492*0Sstevel@tonic-gate! } 493*0Sstevel@tonic-gate 494*0Sstevel@tonic-gate ENTRY(__quad_mag_sub) 495*0Sstevel@tonic-gate save %sp,-SA(MINFRAME),%sp 496*0Sstevel@tonic-gate 497*0Sstevel@tonic-gate sethi %hi(0xffff0000),%g1 498*0Sstevel@tonic-gate sllx %g1,32,%g1 ! g1 = 0xffff000000000000 499*0Sstevel@tonic-gate 500*0Sstevel@tonic-gate sethi %hi(0x7fff),%l7 501*0Sstevel@tonic-gate or %l7,%lo(0x7fff),%l7 ! l7 = 0x7fff 502*0Sstevel@tonic-gate 503*0Sstevel@tonic-gate ldx [%i0],%o0 504*0Sstevel@tonic-gate srlx %o0,48,%l0 505*0Sstevel@tonic-gate andcc %l0,%l7,%l0 ! l0 = ex 506*0Sstevel@tonic-gate beq,pn %icc,1f 507*0Sstevel@tonic-gate andn %o0,%g1,%o0 ! o0 = lx 508*0Sstevel@tonic-gate ba,pt %icc,2f 509*0Sstevel@tonic-gate sub %o0,%g1,%o0 510*0Sstevel@tonic-gate1: 511*0Sstevel@tonic-gate mov 1,%l0 512*0Sstevel@tonic-gate2: 513*0Sstevel@tonic-gate 514*0Sstevel@tonic-gate ldx [%i1],%o1 515*0Sstevel@tonic-gate srlx %o1,48,%l1 516*0Sstevel@tonic-gate andcc %l1,%l7,%l1 ! l1 = ey 517*0Sstevel@tonic-gate beq,pn %icc,1f 518*0Sstevel@tonic-gate andn %o1,%g1,%o1 ! o1 = ly 519*0Sstevel@tonic-gate ba,pt %icc,2f 520*0Sstevel@tonic-gate sub %o1,%g1,%o1 521*0Sstevel@tonic-gate1: 522*0Sstevel@tonic-gate mov 1,%l1 523*0Sstevel@tonic-gate2: 524*0Sstevel@tonic-gate 525*0Sstevel@tonic-gate sub %l0,%l1,%l1 ! l1 = e = ex - ey 526*0Sstevel@tonic-gate cmp %l1,114 ! see if we need to prenormalize y 527*0Sstevel@tonic-gate bg,pn %icc,1f 528*0Sstevel@tonic-gate mov 0,%l6 ! l6 = gr 529*0Sstevel@tonic-gate mov 0,%o7 ! o7 = sticky 530*0Sstevel@tonic-gate cmp %l1,64 531*0Sstevel@tonic-gate bl,pt %icc,3f 532*0Sstevel@tonic-gate ldx [%i1+8],%o2 ! o2 = frac 533*0Sstevel@tonic-gate srlx %o2,62,%l6 534*0Sstevel@tonic-gate sllx %o2,2,%o7 ! lop off top two bits 535*0Sstevel@tonic-gate mov %o1,%o2 536*0Sstevel@tonic-gate mov 0,%o1 537*0Sstevel@tonic-gate sub %l1,64,%l1 538*0Sstevel@tonic-gate3: 539*0Sstevel@tonic-gate cmp %l1,1 540*0Sstevel@tonic-gate ble,pn %icc,4f 541*0Sstevel@tonic-gate sub %l1,2,%l2 ! shift more than one bit 542*0Sstevel@tonic-gate mov 1,%o3 543*0Sstevel@tonic-gate sllx %o3,%l2,%o3 544*0Sstevel@tonic-gate sub %o3,1,%o3 545*0Sstevel@tonic-gate and %o3,%o2,%o3 546*0Sstevel@tonic-gate or %o3,%l6,%o3 547*0Sstevel@tonic-gate or %o7,%o3,%o7 548*0Sstevel@tonic-gate srlx %o2,%l2,%o4 549*0Sstevel@tonic-gate and %o4,3,%l6 550*0Sstevel@tonic-gate srlx %o2,%l1,%o2 551*0Sstevel@tonic-gate mov 64,%l3 552*0Sstevel@tonic-gate sub %l3,%l1,%l3 553*0Sstevel@tonic-gate sllx %o1,%l3,%o5 554*0Sstevel@tonic-gate or %o2,%o5,%o2 555*0Sstevel@tonic-gate ba,pt %icc,2f 556*0Sstevel@tonic-gate srlx %o1,%l1,%o1 557*0Sstevel@tonic-gate4: 558*0Sstevel@tonic-gate bne,pn %icc,2f 559*0Sstevel@tonic-gate and %l6,1,%o3 ! shift one bit 560*0Sstevel@tonic-gate or %o7,%o3,%o7 561*0Sstevel@tonic-gate and %o2,1,%o4 562*0Sstevel@tonic-gate sllx %o4,1,%o4 563*0Sstevel@tonic-gate srl %l6,1,%l6 564*0Sstevel@tonic-gate or %l6,%o4,%l6 565*0Sstevel@tonic-gate srlx %o2,1,%o2 566*0Sstevel@tonic-gate sllx %o1,63,%o5 567*0Sstevel@tonic-gate or %o2,%o5,%o2 568*0Sstevel@tonic-gate ba,pt %icc,2f 569*0Sstevel@tonic-gate srlx %o1,1,%o1 570*0Sstevel@tonic-gate1: 571*0Sstevel@tonic-gate ldx [%i1+8],%o3 ! (full prenormalization shift case) 572*0Sstevel@tonic-gate or %o1,%o3,%o7 573*0Sstevel@tonic-gate mov 0,%o1 574*0Sstevel@tonic-gate mov 0,%o2 575*0Sstevel@tonic-gate2: 576*0Sstevel@tonic-gate 577*0Sstevel@tonic-gate tst %o7 ! complement guard, round, and 578*0Sstevel@tonic-gate beq,pn %xcc,1f ! sticky as need be 579*0Sstevel@tonic-gate sll %l6,1,%l6 580*0Sstevel@tonic-gate or %l6,1,%l6 581*0Sstevel@tonic-gate1: 582*0Sstevel@tonic-gate subcc %g0,%l6,%l6 583*0Sstevel@tonic-gate beq,pn %icc,1f 584*0Sstevel@tonic-gate and %l6,7,%l6 585*0Sstevel@tonic-gate addcc %o2,1,%o2 586*0Sstevel@tonic-gate beq,a,pn %xcc,1f 587*0Sstevel@tonic-gate add %o1,1,%o1 588*0Sstevel@tonic-gate1: 589*0Sstevel@tonic-gate 590*0Sstevel@tonic-gate ldx [%i0+8],%o3 ! subtract, propagating borrows 591*0Sstevel@tonic-gate sub %o3,%o2,%o2 592*0Sstevel@tonic-gate cmp %o3,%o2 593*0Sstevel@tonic-gate bgeu,pt %xcc,5f 594*0Sstevel@tonic-gate sub %o0,%o1,%o0 595*0Sstevel@tonic-gate sub %o0,1,%o0 596*0Sstevel@tonic-gate5: 597*0Sstevel@tonic-gate 598*0Sstevel@tonic-gate ld [%i3],%i4 ! get the rounding mode 599*0Sstevel@tonic-gate srl %i4,30,%l3 ! l3 = rm 600*0Sstevel@tonic-gate 601*0Sstevel@tonic-gate or %o0,%o2,%o1 ! look for zero result 602*0Sstevel@tonic-gate orcc %o1,%l6,%g0 603*0Sstevel@tonic-gate bne,pt %xcc,1f 604*0Sstevel@tonic-gate srl %l3,1,%l4 605*0Sstevel@tonic-gate and %l3,%l4,%l4 606*0Sstevel@tonic-gate sll %l4,31,%l4 607*0Sstevel@tonic-gate st %l4,[%i2] 608*0Sstevel@tonic-gate st %g0,[%i2+4] 609*0Sstevel@tonic-gate stx %g0,[%i2+8] 610*0Sstevel@tonic-gate ret 611*0Sstevel@tonic-gate restore 612*0Sstevel@tonic-gate 613*0Sstevel@tonic-gate1: 614*0Sstevel@tonic-gate addcc %o0,%g1,%g0 ! postnormalize 615*0Sstevel@tonic-gate bl,pt %xcc,1f 616*0Sstevel@tonic-gate ld [%i2],%l4 ! l4 = z->l.msw 617*0Sstevel@tonic-gate and %l6,1,%l5 ! (no cancellation or borrow case) 618*0Sstevel@tonic-gate srl %l6,1,%l6 619*0Sstevel@tonic-gate ba,pt %icc,2f 620*0Sstevel@tonic-gate or %l6,%l5,%l6 621*0Sstevel@tonic-gate1: 622*0Sstevel@tonic-gate srax %g1,1,%o7 623*0Sstevel@tonic-gate addcc %o0,%o7,%g0 624*0Sstevel@tonic-gate bl,pn %xcc,1f 625*0Sstevel@tonic-gate cmp %l0,1 626*0Sstevel@tonic-gate beq,pt %icc,1f 627*0Sstevel@tonic-gate srlx %o2,63,%o3 ! borrowed one place 628*0Sstevel@tonic-gate sllx %o0,1,%o0 629*0Sstevel@tonic-gate or %o0,%o3,%o0 630*0Sstevel@tonic-gate srl %l6,2,%o4 631*0Sstevel@tonic-gate sllx %o2,1,%o2 632*0Sstevel@tonic-gate or %o2,%o4,%o2 633*0Sstevel@tonic-gate and %l6,3,%l6 634*0Sstevel@tonic-gate ba,pt %icc,2f 635*0Sstevel@tonic-gate sub %l0,1,%l0 636*0Sstevel@tonic-gate1: 637*0Sstevel@tonic-gate srlx %o2,49,%o3 ! cancellation or tiny result 638*0Sstevel@tonic-gate orcc %o0,%o3,%g0 639*0Sstevel@tonic-gate bne,pt %xcc,1f 640*0Sstevel@tonic-gate cmp %l0,64 641*0Sstevel@tonic-gate ble,pn %icc,1f 642*0Sstevel@tonic-gate nop 643*0Sstevel@tonic-gate mov %o2,%o0 644*0Sstevel@tonic-gate sllx %l6,61,%o2 645*0Sstevel@tonic-gate mov 0,%l6 646*0Sstevel@tonic-gate sub %l0,64,%l0 647*0Sstevel@tonic-gate1: 648*0Sstevel@tonic-gate addcc %o0,%g1,%g0 ! normalization loop 649*0Sstevel@tonic-gate bge,pn %xcc,1f 650*0Sstevel@tonic-gate cmp %l0,1 651*0Sstevel@tonic-gate ble,pn %icc,1f 652*0Sstevel@tonic-gate srl %l6,2,%l6 653*0Sstevel@tonic-gate srlx %o2,63,%o3 654*0Sstevel@tonic-gate sllx %o0,1,%o0 655*0Sstevel@tonic-gate or %o0,%o3,%o0 656*0Sstevel@tonic-gate sllx %o2,1,%o2 657*0Sstevel@tonic-gate or %o2,%l6,%o2 658*0Sstevel@tonic-gate ba,pt %icc,1b 659*0Sstevel@tonic-gate sub %l0,1,%l0 660*0Sstevel@tonic-gate1: 661*0Sstevel@tonic-gate sllx %o0,16,%o1 662*0Sstevel@tonic-gate srlx %o1,48,%l5 663*0Sstevel@tonic-gate or %l4,%l5,%l4 664*0Sstevel@tonic-gate addcc %o0,%g1,%g0 ! see if result is tiny 665*0Sstevel@tonic-gate bl,pn %xcc,1f 666*0Sstevel@tonic-gate sll %l0,16,%l5 667*0Sstevel@tonic-gate or %l4,%l5,%l4 668*0Sstevel@tonic-gate1: 669*0Sstevel@tonic-gate st %l4,[%i2] 670*0Sstevel@tonic-gate st %o0,[%i2+4] 671*0Sstevel@tonic-gate bge,pt %xcc,1f 672*0Sstevel@tonic-gate stx %o2,[%i2+8] 673*0Sstevel@tonic-gate srl %i4,23,%i5 674*0Sstevel@tonic-gate andcc %i5,4,%g0 ! see if we need to raise underflow 675*0Sstevel@tonic-gate beq,pt %icc,1f 676*0Sstevel@tonic-gate or %i4,4,%i4 677*0Sstevel@tonic-gate st %i4,[%i3] 678*0Sstevel@tonic-gate1: 679*0Sstevel@tonic-gate ret 680*0Sstevel@tonic-gate restore 681*0Sstevel@tonic-gate 682*0Sstevel@tonic-gate2: 683*0Sstevel@tonic-gate tst %l4 ! fudge directect rounding modes 684*0Sstevel@tonic-gate beq,pn %icc,1f 685*0Sstevel@tonic-gate srl %l3,1,%l5 686*0Sstevel@tonic-gate xor %l3,%l5,%l3 687*0Sstevel@tonic-gate1: 688*0Sstevel@tonic-gate 689*0Sstevel@tonic-gate tst %l6 ! see if we need to round 690*0Sstevel@tonic-gate beq,pn %icc,1f 691*0Sstevel@tonic-gate or %i4,1,%i4 692*0Sstevel@tonic-gate st %i4,[%i3] 693*0Sstevel@tonic-gate andcc %l3,1,%g0 694*0Sstevel@tonic-gate bne,pn %icc,1f 695*0Sstevel@tonic-gate tst %l3 696*0Sstevel@tonic-gate bne,pn %icc,2f 697*0Sstevel@tonic-gate andcc %l6,2,%g0 698*0Sstevel@tonic-gate beq,pn %icc,1f 699*0Sstevel@tonic-gate or %l6,%o2,%o3 700*0Sstevel@tonic-gate andcc %o3,1,%o3 701*0Sstevel@tonic-gate beq,pn %xcc,1f 702*0Sstevel@tonic-gate nop 703*0Sstevel@tonic-gate2: 704*0Sstevel@tonic-gate addcc %o2,1,%o2 ! round up and check for carry 705*0Sstevel@tonic-gate bne,pt %xcc,1f 706*0Sstevel@tonic-gate nop 707*0Sstevel@tonic-gate add %o0,1,%o0 708*0Sstevel@tonic-gate srlx %o0,49,%o1 709*0Sstevel@tonic-gate tst %o1 710*0Sstevel@tonic-gate beq,pt %icc,1f 711*0Sstevel@tonic-gate nop 712*0Sstevel@tonic-gate srlx %o0,1,%o0 713*0Sstevel@tonic-gate add %l0,1,%l0 714*0Sstevel@tonic-gate1: 715*0Sstevel@tonic-gate 716*0Sstevel@tonic-gate sllx %o0,16,%o1 717*0Sstevel@tonic-gate srlx %o1,48,%o1 718*0Sstevel@tonic-gate or %l4,%o1,%l4 719*0Sstevel@tonic-gate sll %l0,16,%l5 720*0Sstevel@tonic-gate or %l4,%l5,%l4 721*0Sstevel@tonic-gate st %l4,[%i2] 722*0Sstevel@tonic-gate st %o0,[%i2+4] 723*0Sstevel@tonic-gate stx %o2,[%i2+8] 724*0Sstevel@tonic-gate ret 725*0Sstevel@tonic-gate restore 726*0Sstevel@tonic-gate 727*0Sstevel@tonic-gate SET_SIZE(__quad_mag_sub) 728