1*24566Szliu# 2*24566Szliu# Copyright (c) 1985 Regents of the University of California. 3*24566Szliu# 4*24566Szliu# Use and reproduction of this software are granted in accordance with 5*24566Szliu# the terms and conditions specified in the Berkeley Software License 6*24566Szliu# Agreement (in particular, this entails acknowledgement of the programs' 7*24566Szliu# source, and inclusion of this notice) with the additional understanding 8*24566Szliu# that all recipients should regard themselves as participants in an 9*24566Szliu# ongoing research project and hence should feel obligated to report 10*24566Szliu# their experiences (good or bad) with these elementary function codes, 11*24566Szliu# using "sendbug 4bsd-bugs@BERKELEY", to the authors. 12*24566Szliu# 13*24566Szliu 14*24566Szliu# @(#)atan2.s 1.1 (ELEFUNT) 09/06/85 15*24566Szliu 16*24566Szliu# ATAN2(Y,X) 17*24566Szliu# RETURN ARG (X+iY) 18*24566Szliu# VAX D FORMAT (56 BITS PRECISION) 19*24566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85; 20*24566Szliu# 21*24566Szliu# 22*24566Szliu# Method : 23*24566Szliu# 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). 24*24566Szliu# 2. Reduce x to positive by (if x and y are unexceptional): 25*24566Szliu# ARG (x+iy) = arctan(y/x) ... if x > 0, 26*24566Szliu# ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, 27*24566Szliu# 3. According to the integer k=4t+0.25 truncated , t=y/x, the argument 28*24566Szliu# is further reduced to one of the following intervals and the 29*24566Szliu# arctangent of y/x is evaluated by the corresponding formula: 30*24566Szliu# 31*24566Szliu# [0,7/16] atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) 32*24566Szliu# [7/16,11/16] atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) ) 33*24566Szliu# [11/16.19/16] atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) ) 34*24566Szliu# [19/16,39/16] atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) ) 35*24566Szliu# [39/16,INF] atan(y/x) = atan(INF) + atan( -x/y ) 36*24566Szliu# 37*24566Szliu# Special cases: 38*24566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y). 39*24566Szliu# 40*24566Szliu# ARG( NAN , (anything) ) is NaN; 41*24566Szliu# ARG( (anything), NaN ) is NaN; 42*24566Szliu# ARG(+(anything but NaN), +-0) is +-0 ; 43*24566Szliu# ARG(-(anything but NaN), +-0) is +-PI ; 44*24566Szliu# ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2; 45*24566Szliu# ARG( +INF,+-(anything but INF and NaN) ) is +-0 ; 46*24566Szliu# ARG( -INF,+-(anything but INF and NaN) ) is +-PI; 47*24566Szliu# ARG( +INF,+-INF ) is +-PI/4 ; 48*24566Szliu# ARG( -INF,+-INF ) is +-3PI/4; 49*24566Szliu# ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2; 50*24566Szliu# 51*24566Szliu# Accuracy: 52*24566Szliu# atan2(y,x) returns the exact ARG(x+iy) nearly rounded. 53*24566Szliu# 54*24566Szliu .text 55*24566Szliu .align 1 56*24566Szliu .globl _atan2 57*24566Szliu_atan2 : 58*24566Szliu .word 0x0ff4 59*24566Szliu movq 4(ap),r2 # r2 = y 60*24566Szliu movq 12(ap),r4 # r4 = x 61*24566Szliu bicw3 $0x7f,r2,r0 62*24566Szliu bicw3 $0x7f,r4,r1 63*24566Szliu cmpw r0,$0x8000 # y is the reserved operand 64*24566Szliu jeql resop 65*24566Szliu cmpw r1,$0x8000 # x is the reserved operand 66*24566Szliu jeql resop 67*24566Szliu subl2 $8,sp 68*24566Szliu bicw3 $0x7fff,r2,-4(fp) # copy y sign bit to -4(fp) 69*24566Szliu bicw3 $0x7fff,r4,-8(fp) # copy x sign bit to -8(fp) 70*24566Szliu cmpd r4,$0x4080 # x = 1.0 ? 71*24566Szliu bneq xnot1 72*24566Szliu movq r2,r0 73*24566Szliu bicw2 $0x8000,r0 # t = |y| 74*24566Szliu movq r0,r2 # y = |y| 75*24566Szliu brb begin 76*24566Szliuxnot1: 77*24566Szliu bicw3 $0x807f,r2,r11 # yexp 78*24566Szliu jeql yeq0 # if y=0 goto yeq0 79*24566Szliu bicw3 $0x807f,r4,r10 # xexp 80*24566Szliu jeql pio2 # if x=0 goto pio2 81*24566Szliu subw2 r10,r11 # k = yexp - xexp 82*24566Szliu cmpw r11,$0x2000 # k >= 64 (exp) ? 83*24566Szliu jgeq pio2 # atan2 = +-pi/2 84*24566Szliu divd3 r4,r2,r0 # t = y/x never overflow 85*24566Szliu bicw2 $0x8000,r0 # t > 0 86*24566Szliu bicw2 $0xff80,r2 # clear the exponent of y 87*24566Szliu bicw2 $0xff80,r4 # clear the exponent of x 88*24566Szliu bisw2 $0x4080,r2 # normalize y to [1,2) 89*24566Szliu bisw2 $0x4080,r4 # normalize x to [1,2) 90*24566Szliu subw2 r11,r4 # scale x so that yexp-xexp=k 91*24566Szliubegin: 92*24566Szliu cmpw r0,$0x411c # t : 39/16 93*24566Szliu jgeq L50 94*24566Szliu addl3 $0x180,r0,r10 # 8*t 95*24566Szliu cvtrfl r10,r10 # [8*t] rounded to int 96*24566Szliu ashl $-1,r10,r10 # [8*t]/2 97*24566Szliu casel r10,$0,$4 98*24566SzliuL1: 99*24566Szliu .word L20-L1 100*24566Szliu .word L20-L1 101*24566Szliu .word L30-L1 102*24566Szliu .word L40-L1 103*24566Szliu .word L40-L1 104*24566SzliuL10: 105*24566Szliu movq $0xb4d9940f985e407b,r6 # Hi=.98279372324732906796d0 106*24566Szliu movq $0x21b1879a3bc2a2fc,r8 # Lo=-.17092002525602665777d-17 107*24566Szliu subd3 r4,r2,r0 # y-x 108*24566Szliu addw2 $0x80,r0 # 2(y-x) 109*24566Szliu subd2 r4,r0 # 2(y-x)-x 110*24566Szliu addw2 $0x80,r4 # 2x 111*24566Szliu movq r2,r10 112*24566Szliu addw2 $0x80,r10 # 2y 113*24566Szliu addd2 r10,r2 # 3y 114*24566Szliu addd2 r4,r2 # 3y+2x 115*24566Szliu divd2 r2,r0 # (2y-3x)/(2x+3y) 116*24566Szliu brw L60 117*24566SzliuL20: 118*24566Szliu cmpw r0,$0x3280 # t : 2**(-28) 119*24566Szliu jlss L80 120*24566Szliu clrq r6 # Hi=r6=0, Lo=r8=0 121*24566Szliu clrq r8 122*24566Szliu brw L60 123*24566SzliuL30: 124*24566Szliu movq $0xda7b2b0d63383fed,r6 # Hi=.46364760900080611433d0 125*24566Szliu movq $0xf0ea17b2bf912295,r8 # Lo=.10147340032515978826d-17 126*24566Szliu movq r2,r0 127*24566Szliu addw2 $0x80,r0 # 2y 128*24566Szliu subd2 r4,r0 # 2y-x 129*24566Szliu addw2 $0x80,r4 # 2x 130*24566Szliu addd2 r2,r4 # 2x+y 131*24566Szliu divd2 r4,r0 # (2y-x)/(2x+y) 132*24566Szliu brb L60 133*24566SzliuL50: 134*24566Szliu movq $0x68c2a2210fda40c9,r6 # Hi=1.5707963267948966135d1 135*24566Szliu movq $0x06e0145c26332326,r8 # Lo=.22517417741562176079d-17 136*24566Szliu cmpw r0,$0x5100 # y : 2**57 137*24566Szliu bgeq L90 138*24566Szliu divd3 r2,r4,r0 139*24566Szliu bisw2 $0x8000,r0 # -x/y 140*24566Szliu brb L60 141*24566SzliuL40: 142*24566Szliu movq $0x68c2a2210fda4049,r6 # Hi=.78539816339744830676d0 143*24566Szliu movq $0x06e0145c263322a6,r8 # Lo=.11258708870781088040d-17 144*24566Szliu subd3 r4,r2,r0 # y-x 145*24566Szliu addd2 r4,r2 # y+x 146*24566Szliu divd2 r2,r0 # (y-x)/(y+x) 147*24566SzliuL60: 148*24566Szliu movq r0,r10 149*24566Szliu muld2 r0,r0 150*24566Szliu polyd r0,$12,ptable 151*24566Szliu muld2 r10,r0 152*24566Szliu subd2 r0,r8 153*24566Szliu addd3 r8,r10,r0 154*24566Szliu addd2 r6,r0 155*24566SzliuL80: 156*24566Szliu movw -8(fp),r2 157*24566Szliu bneq pim 158*24566Szliu bisw2 -4(fp),r0 # return sign(y)*r0 159*24566Szliu ret 160*24566SzliuL90: # x >= 2**25 161*24566Szliu movq r6,r0 162*24566Szliu brb L80 163*24566Szliupim: 164*24566Szliu subd3 r0,$0x68c2a2210fda4149,r0 # pi-t 165*24566Szliu bisw2 -4(fp),r0 166*24566Szliu ret 167*24566Szliuyeq0: 168*24566Szliu movw -8(fp),r2 169*24566Szliu beql zero # if sign(x)=1 return pi 170*24566Szliu movq $0x68c2a2210fda4149,r0 # pi=3.1415926535897932270d1 171*24566Szliu ret 172*24566Szliuzero: 173*24566Szliu clrq r0 # return 0 174*24566Szliu ret 175*24566Szliupio2: 176*24566Szliu movq $0x68c2a2210fda40c9,r0 # pi/2=1.5707963267948966135d1 177*24566Szliu bisw2 -4(fp),r0 # return sign(y)*pi/2 178*24566Szliu ret 179*24566Szliuresop: 180*24566Szliu movq $0x8000,r0 # propagate the reserved operand 181*24566Szliu ret 182*24566Szliu .align 2 183*24566Szliuptable: 184*24566Szliu .quad 0xb50f5ce96e7abd60 185*24566Szliu .quad 0x51e44a42c1073e02 186*24566Szliu .quad 0x3487e3289643be35 187*24566Szliu .quad 0xdb62066dffba3e54 188*24566Szliu .quad 0xcf8e2d5199abbe70 189*24566Szliu .quad 0x26f39cb884883e88 190*24566Szliu .quad 0x135117d18998be9d 191*24566Szliu .quad 0x602ce9742e883eba 192*24566Szliu .quad 0xa35ad0be8e38bee3 193*24566Szliu .quad 0xffac922249243f12 194*24566Szliu .quad 0x7f14ccccccccbf4c 195*24566Szliu .quad 0xaa8faaaaaaaa3faa 196*24566Szliu .quad 0x0000000000000000 197