124566Szliu# 224566Szliu# Copyright (c) 1985 Regents of the University of California. 324566Szliu# 424566Szliu# Use and reproduction of this software are granted in accordance with 524566Szliu# the terms and conditions specified in the Berkeley Software License 624566Szliu# Agreement (in particular, this entails acknowledgement of the programs' 724566Szliu# source, and inclusion of this notice) with the additional understanding 824566Szliu# that all recipients should regard themselves as participants in an 924566Szliu# ongoing research project and hence should feel obligated to report 1024566Szliu# their experiences (good or bad) with these elementary function codes, 1124566Szliu# using "sendbug 4bsd-bugs@BERKELEY", to the authors. 1224566Szliu# 13*24729Selefunt .data 14*24729Selefunt .align 2 15*24729Selefunt_sccsid: 16*24729Selefunt.asciz "@(#)atan2.s 1.2 (Berkeley) 8/21/85; 1.3 (ucb.elefunt) 09/12/85" 1724566Szliu 1824566Szliu# ATAN2(Y,X) 1924566Szliu# RETURN ARG (X+iY) 2024566Szliu# VAX D FORMAT (56 BITS PRECISION) 2124566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85; 2224566Szliu# 2324566Szliu# 2424566Szliu# Method : 2524566Szliu# 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). 2624566Szliu# 2. Reduce x to positive by (if x and y are unexceptional): 2724566Szliu# ARG (x+iy) = arctan(y/x) ... if x > 0, 2824566Szliu# ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, 2924566Szliu# 3. According to the integer k=4t+0.25 truncated , t=y/x, the argument 3024566Szliu# is further reduced to one of the following intervals and the 3124566Szliu# arctangent of y/x is evaluated by the corresponding formula: 3224566Szliu# 3324566Szliu# [0,7/16] atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) 3424566Szliu# [7/16,11/16] atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) ) 3524566Szliu# [11/16.19/16] atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) ) 3624566Szliu# [19/16,39/16] atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) ) 3724566Szliu# [39/16,INF] atan(y/x) = atan(INF) + atan( -x/y ) 3824566Szliu# 3924566Szliu# Special cases: 4024566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y). 4124566Szliu# 4224566Szliu# ARG( NAN , (anything) ) is NaN; 4324566Szliu# ARG( (anything), NaN ) is NaN; 4424566Szliu# ARG(+(anything but NaN), +-0) is +-0 ; 4524566Szliu# ARG(-(anything but NaN), +-0) is +-PI ; 4624566Szliu# ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2; 4724566Szliu# ARG( +INF,+-(anything but INF and NaN) ) is +-0 ; 4824566Szliu# ARG( -INF,+-(anything but INF and NaN) ) is +-PI; 4924566Szliu# ARG( +INF,+-INF ) is +-PI/4 ; 5024566Szliu# ARG( -INF,+-INF ) is +-3PI/4; 5124566Szliu# ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2; 5224566Szliu# 5324566Szliu# Accuracy: 5424566Szliu# atan2(y,x) returns the exact ARG(x+iy) nearly rounded. 5524566Szliu# 5624566Szliu .text 5724566Szliu .align 1 5824566Szliu .globl _atan2 5924566Szliu_atan2 : 6024566Szliu .word 0x0ff4 6124566Szliu movq 4(ap),r2 # r2 = y 6224566Szliu movq 12(ap),r4 # r4 = x 6324566Szliu bicw3 $0x7f,r2,r0 6424566Szliu bicw3 $0x7f,r4,r1 6524566Szliu cmpw r0,$0x8000 # y is the reserved operand 6624566Szliu jeql resop 6724566Szliu cmpw r1,$0x8000 # x is the reserved operand 6824566Szliu jeql resop 6924566Szliu subl2 $8,sp 7024566Szliu bicw3 $0x7fff,r2,-4(fp) # copy y sign bit to -4(fp) 7124566Szliu bicw3 $0x7fff,r4,-8(fp) # copy x sign bit to -8(fp) 7224566Szliu cmpd r4,$0x4080 # x = 1.0 ? 7324566Szliu bneq xnot1 7424566Szliu movq r2,r0 7524566Szliu bicw2 $0x8000,r0 # t = |y| 7624566Szliu movq r0,r2 # y = |y| 7724566Szliu brb begin 7824566Szliuxnot1: 7924566Szliu bicw3 $0x807f,r2,r11 # yexp 8024566Szliu jeql yeq0 # if y=0 goto yeq0 8124566Szliu bicw3 $0x807f,r4,r10 # xexp 8224566Szliu jeql pio2 # if x=0 goto pio2 8324566Szliu subw2 r10,r11 # k = yexp - xexp 8424566Szliu cmpw r11,$0x2000 # k >= 64 (exp) ? 8524566Szliu jgeq pio2 # atan2 = +-pi/2 8624566Szliu divd3 r4,r2,r0 # t = y/x never overflow 8724566Szliu bicw2 $0x8000,r0 # t > 0 8824566Szliu bicw2 $0xff80,r2 # clear the exponent of y 8924566Szliu bicw2 $0xff80,r4 # clear the exponent of x 9024566Szliu bisw2 $0x4080,r2 # normalize y to [1,2) 9124566Szliu bisw2 $0x4080,r4 # normalize x to [1,2) 9224566Szliu subw2 r11,r4 # scale x so that yexp-xexp=k 9324566Szliubegin: 9424566Szliu cmpw r0,$0x411c # t : 39/16 9524566Szliu jgeq L50 9624566Szliu addl3 $0x180,r0,r10 # 8*t 9724566Szliu cvtrfl r10,r10 # [8*t] rounded to int 9824566Szliu ashl $-1,r10,r10 # [8*t]/2 9924566Szliu casel r10,$0,$4 10024566SzliuL1: 10124566Szliu .word L20-L1 10224566Szliu .word L20-L1 10324566Szliu .word L30-L1 10424566Szliu .word L40-L1 10524566Szliu .word L40-L1 10624566SzliuL10: 10724566Szliu movq $0xb4d9940f985e407b,r6 # Hi=.98279372324732906796d0 10824566Szliu movq $0x21b1879a3bc2a2fc,r8 # Lo=-.17092002525602665777d-17 10924566Szliu subd3 r4,r2,r0 # y-x 11024566Szliu addw2 $0x80,r0 # 2(y-x) 11124566Szliu subd2 r4,r0 # 2(y-x)-x 11224566Szliu addw2 $0x80,r4 # 2x 11324566Szliu movq r2,r10 11424566Szliu addw2 $0x80,r10 # 2y 11524566Szliu addd2 r10,r2 # 3y 11624566Szliu addd2 r4,r2 # 3y+2x 11724566Szliu divd2 r2,r0 # (2y-3x)/(2x+3y) 11824566Szliu brw L60 11924566SzliuL20: 12024566Szliu cmpw r0,$0x3280 # t : 2**(-28) 12124566Szliu jlss L80 12224566Szliu clrq r6 # Hi=r6=0, Lo=r8=0 12324566Szliu clrq r8 12424566Szliu brw L60 12524566SzliuL30: 12624566Szliu movq $0xda7b2b0d63383fed,r6 # Hi=.46364760900080611433d0 12724566Szliu movq $0xf0ea17b2bf912295,r8 # Lo=.10147340032515978826d-17 12824566Szliu movq r2,r0 12924566Szliu addw2 $0x80,r0 # 2y 13024566Szliu subd2 r4,r0 # 2y-x 13124566Szliu addw2 $0x80,r4 # 2x 13224566Szliu addd2 r2,r4 # 2x+y 13324566Szliu divd2 r4,r0 # (2y-x)/(2x+y) 13424566Szliu brb L60 13524566SzliuL50: 13624566Szliu movq $0x68c2a2210fda40c9,r6 # Hi=1.5707963267948966135d1 13724566Szliu movq $0x06e0145c26332326,r8 # Lo=.22517417741562176079d-17 13824566Szliu cmpw r0,$0x5100 # y : 2**57 13924566Szliu bgeq L90 14024566Szliu divd3 r2,r4,r0 14124566Szliu bisw2 $0x8000,r0 # -x/y 14224566Szliu brb L60 14324566SzliuL40: 14424566Szliu movq $0x68c2a2210fda4049,r6 # Hi=.78539816339744830676d0 14524566Szliu movq $0x06e0145c263322a6,r8 # Lo=.11258708870781088040d-17 14624566Szliu subd3 r4,r2,r0 # y-x 14724566Szliu addd2 r4,r2 # y+x 14824566Szliu divd2 r2,r0 # (y-x)/(y+x) 14924566SzliuL60: 15024566Szliu movq r0,r10 15124566Szliu muld2 r0,r0 15224566Szliu polyd r0,$12,ptable 15324566Szliu muld2 r10,r0 15424566Szliu subd2 r0,r8 15524566Szliu addd3 r8,r10,r0 15624566Szliu addd2 r6,r0 15724566SzliuL80: 15824566Szliu movw -8(fp),r2 15924566Szliu bneq pim 16024566Szliu bisw2 -4(fp),r0 # return sign(y)*r0 16124566Szliu ret 16224566SzliuL90: # x >= 2**25 16324566Szliu movq r6,r0 16424566Szliu brb L80 16524566Szliupim: 16624566Szliu subd3 r0,$0x68c2a2210fda4149,r0 # pi-t 16724566Szliu bisw2 -4(fp),r0 16824566Szliu ret 16924566Szliuyeq0: 17024566Szliu movw -8(fp),r2 17124566Szliu beql zero # if sign(x)=1 return pi 17224566Szliu movq $0x68c2a2210fda4149,r0 # pi=3.1415926535897932270d1 17324566Szliu ret 17424566Szliuzero: 17524566Szliu clrq r0 # return 0 17624566Szliu ret 17724566Szliupio2: 17824566Szliu movq $0x68c2a2210fda40c9,r0 # pi/2=1.5707963267948966135d1 17924566Szliu bisw2 -4(fp),r0 # return sign(y)*pi/2 18024566Szliu ret 18124566Szliuresop: 18224566Szliu movq $0x8000,r0 # propagate the reserved operand 18324566Szliu ret 18424566Szliu .align 2 18524566Szliuptable: 18624566Szliu .quad 0xb50f5ce96e7abd60 18724566Szliu .quad 0x51e44a42c1073e02 18824566Szliu .quad 0x3487e3289643be35 18924566Szliu .quad 0xdb62066dffba3e54 19024566Szliu .quad 0xcf8e2d5199abbe70 19124566Szliu .quad 0x26f39cb884883e88 19224566Szliu .quad 0x135117d18998be9d 19324566Szliu .quad 0x602ce9742e883eba 19424566Szliu .quad 0xa35ad0be8e38bee3 19524566Szliu .quad 0xffac922249243f12 19624566Szliu .quad 0x7f14ccccccccbf4c 19724566Szliu .quad 0xaa8faaaaaaaa3faa 19824566Szliu .quad 0x0000000000000000 199