124566Szliu# Copyright (c) 1985 Regents of the University of California. 2*34125Sbostic# All rights reserved. 324566Szliu# 4*34125Sbostic# Redistribution and use in source and binary forms are permitted 5*34125Sbostic# provided that this notice is preserved and that due credit is given 6*34125Sbostic# to the University of California at Berkeley. The name of the University 7*34125Sbostic# may not be used to endorse or promote products derived from this 8*34125Sbostic# software without specific prior written permission. This software 9*34125Sbostic# is provided ``as is'' without express or implied warranty. 10*34125Sbostic# 11*34125Sbostic# All recipients should regard themselves as participants in an ongoing 12*34125Sbostic# research project and hence should feel obligated to report their 13*34125Sbostic# experiences (good or bad) with these elementary function codes, using 14*34125Sbostic# the sendbug(8) program, to the authors. 15*34125Sbostic# 16*34125Sbostic# @(#)atan2.s 5.2 (Berkeley) 04/29/88 17*34125Sbostic# 1824729Selefunt .data 1924729Selefunt .align 2 2024729Selefunt_sccsid: 21*34125Sbostic.asciz "@(#)atan2.s 1.2 (Berkeley) 8/21/85; 5.2 (ucb.elefunt) 04/29/88" 2224566Szliu 2324566Szliu# ATAN2(Y,X) 2424566Szliu# RETURN ARG (X+iY) 2524566Szliu# VAX D FORMAT (56 BITS PRECISION) 2624566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85; 2724566Szliu# 2824566Szliu# 2924566Szliu# Method : 3024566Szliu# 1. Reduce y to positive by atan2(y,x)=-atan2(-y,x). 3124566Szliu# 2. Reduce x to positive by (if x and y are unexceptional): 3224566Szliu# ARG (x+iy) = arctan(y/x) ... if x > 0, 3324566Szliu# ARG (x+iy) = pi - arctan[y/(-x)] ... if x < 0, 3424566Szliu# 3. According to the integer k=4t+0.25 truncated , t=y/x, the argument 3524566Szliu# is further reduced to one of the following intervals and the 3624566Szliu# arctangent of y/x is evaluated by the corresponding formula: 3724566Szliu# 3824566Szliu# [0,7/16] atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...) 3924566Szliu# [7/16,11/16] atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) ) 4024566Szliu# [11/16.19/16] atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) ) 4124566Szliu# [19/16,39/16] atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) ) 4224566Szliu# [39/16,INF] atan(y/x) = atan(INF) + atan( -x/y ) 4324566Szliu# 4424566Szliu# Special cases: 4524566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y). 4624566Szliu# 4724566Szliu# ARG( NAN , (anything) ) is NaN; 4824566Szliu# ARG( (anything), NaN ) is NaN; 4924566Szliu# ARG(+(anything but NaN), +-0) is +-0 ; 5024566Szliu# ARG(-(anything but NaN), +-0) is +-PI ; 5124566Szliu# ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2; 5224566Szliu# ARG( +INF,+-(anything but INF and NaN) ) is +-0 ; 5324566Szliu# ARG( -INF,+-(anything but INF and NaN) ) is +-PI; 5424566Szliu# ARG( +INF,+-INF ) is +-PI/4 ; 5524566Szliu# ARG( -INF,+-INF ) is +-3PI/4; 5624566Szliu# ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2; 5724566Szliu# 5824566Szliu# Accuracy: 5924566Szliu# atan2(y,x) returns the exact ARG(x+iy) nearly rounded. 6024566Szliu# 6124566Szliu .text 6224566Szliu .align 1 6324566Szliu .globl _atan2 6424566Szliu_atan2 : 6524566Szliu .word 0x0ff4 6624566Szliu movq 4(ap),r2 # r2 = y 6724566Szliu movq 12(ap),r4 # r4 = x 6824566Szliu bicw3 $0x7f,r2,r0 6924566Szliu bicw3 $0x7f,r4,r1 7024566Szliu cmpw r0,$0x8000 # y is the reserved operand 7124566Szliu jeql resop 7224566Szliu cmpw r1,$0x8000 # x is the reserved operand 7324566Szliu jeql resop 7424566Szliu subl2 $8,sp 7524566Szliu bicw3 $0x7fff,r2,-4(fp) # copy y sign bit to -4(fp) 7624566Szliu bicw3 $0x7fff,r4,-8(fp) # copy x sign bit to -8(fp) 7724566Szliu cmpd r4,$0x4080 # x = 1.0 ? 7824566Szliu bneq xnot1 7924566Szliu movq r2,r0 8024566Szliu bicw2 $0x8000,r0 # t = |y| 8124566Szliu movq r0,r2 # y = |y| 8224566Szliu brb begin 8324566Szliuxnot1: 8424566Szliu bicw3 $0x807f,r2,r11 # yexp 8524566Szliu jeql yeq0 # if y=0 goto yeq0 8624566Szliu bicw3 $0x807f,r4,r10 # xexp 8724566Szliu jeql pio2 # if x=0 goto pio2 8824566Szliu subw2 r10,r11 # k = yexp - xexp 8924566Szliu cmpw r11,$0x2000 # k >= 64 (exp) ? 9024566Szliu jgeq pio2 # atan2 = +-pi/2 9124566Szliu divd3 r4,r2,r0 # t = y/x never overflow 9224566Szliu bicw2 $0x8000,r0 # t > 0 9324566Szliu bicw2 $0xff80,r2 # clear the exponent of y 9424566Szliu bicw2 $0xff80,r4 # clear the exponent of x 9524566Szliu bisw2 $0x4080,r2 # normalize y to [1,2) 9624566Szliu bisw2 $0x4080,r4 # normalize x to [1,2) 9724566Szliu subw2 r11,r4 # scale x so that yexp-xexp=k 9824566Szliubegin: 9924566Szliu cmpw r0,$0x411c # t : 39/16 10024566Szliu jgeq L50 10124566Szliu addl3 $0x180,r0,r10 # 8*t 10224566Szliu cvtrfl r10,r10 # [8*t] rounded to int 10324566Szliu ashl $-1,r10,r10 # [8*t]/2 10424566Szliu casel r10,$0,$4 10524566SzliuL1: 10624566Szliu .word L20-L1 10724566Szliu .word L20-L1 10824566Szliu .word L30-L1 10924566Szliu .word L40-L1 11024566Szliu .word L40-L1 11124566SzliuL10: 11224566Szliu movq $0xb4d9940f985e407b,r6 # Hi=.98279372324732906796d0 11324566Szliu movq $0x21b1879a3bc2a2fc,r8 # Lo=-.17092002525602665777d-17 11424566Szliu subd3 r4,r2,r0 # y-x 11524566Szliu addw2 $0x80,r0 # 2(y-x) 11624566Szliu subd2 r4,r0 # 2(y-x)-x 11724566Szliu addw2 $0x80,r4 # 2x 11824566Szliu movq r2,r10 11924566Szliu addw2 $0x80,r10 # 2y 12024566Szliu addd2 r10,r2 # 3y 12124566Szliu addd2 r4,r2 # 3y+2x 12224566Szliu divd2 r2,r0 # (2y-3x)/(2x+3y) 12324566Szliu brw L60 12424566SzliuL20: 12524566Szliu cmpw r0,$0x3280 # t : 2**(-28) 12624566Szliu jlss L80 12724566Szliu clrq r6 # Hi=r6=0, Lo=r8=0 12824566Szliu clrq r8 12924566Szliu brw L60 13024566SzliuL30: 13124566Szliu movq $0xda7b2b0d63383fed,r6 # Hi=.46364760900080611433d0 13224566Szliu movq $0xf0ea17b2bf912295,r8 # Lo=.10147340032515978826d-17 13324566Szliu movq r2,r0 13424566Szliu addw2 $0x80,r0 # 2y 13524566Szliu subd2 r4,r0 # 2y-x 13624566Szliu addw2 $0x80,r4 # 2x 13724566Szliu addd2 r2,r4 # 2x+y 13824566Szliu divd2 r4,r0 # (2y-x)/(2x+y) 13924566Szliu brb L60 14024566SzliuL50: 14124566Szliu movq $0x68c2a2210fda40c9,r6 # Hi=1.5707963267948966135d1 14224566Szliu movq $0x06e0145c26332326,r8 # Lo=.22517417741562176079d-17 14324566Szliu cmpw r0,$0x5100 # y : 2**57 14424566Szliu bgeq L90 14524566Szliu divd3 r2,r4,r0 14624566Szliu bisw2 $0x8000,r0 # -x/y 14724566Szliu brb L60 14824566SzliuL40: 14924566Szliu movq $0x68c2a2210fda4049,r6 # Hi=.78539816339744830676d0 15024566Szliu movq $0x06e0145c263322a6,r8 # Lo=.11258708870781088040d-17 15124566Szliu subd3 r4,r2,r0 # y-x 15224566Szliu addd2 r4,r2 # y+x 15324566Szliu divd2 r2,r0 # (y-x)/(y+x) 15424566SzliuL60: 15524566Szliu movq r0,r10 15624566Szliu muld2 r0,r0 15724566Szliu polyd r0,$12,ptable 15824566Szliu muld2 r10,r0 15924566Szliu subd2 r0,r8 16024566Szliu addd3 r8,r10,r0 16124566Szliu addd2 r6,r0 16224566SzliuL80: 16324566Szliu movw -8(fp),r2 16424566Szliu bneq pim 16524566Szliu bisw2 -4(fp),r0 # return sign(y)*r0 16624566Szliu ret 16724566SzliuL90: # x >= 2**25 16824566Szliu movq r6,r0 16924566Szliu brb L80 17024566Szliupim: 17124566Szliu subd3 r0,$0x68c2a2210fda4149,r0 # pi-t 17224566Szliu bisw2 -4(fp),r0 17324566Szliu ret 17424566Szliuyeq0: 17524566Szliu movw -8(fp),r2 17624566Szliu beql zero # if sign(x)=1 return pi 17724566Szliu movq $0x68c2a2210fda4149,r0 # pi=3.1415926535897932270d1 17824566Szliu ret 17924566Szliuzero: 18024566Szliu clrq r0 # return 0 18124566Szliu ret 18224566Szliupio2: 18324566Szliu movq $0x68c2a2210fda40c9,r0 # pi/2=1.5707963267948966135d1 18424566Szliu bisw2 -4(fp),r0 # return sign(y)*pi/2 18524566Szliu ret 18624566Szliuresop: 18724566Szliu movq $0x8000,r0 # propagate the reserved operand 18824566Szliu ret 18924566Szliu .align 2 19024566Szliuptable: 19124566Szliu .quad 0xb50f5ce96e7abd60 19224566Szliu .quad 0x51e44a42c1073e02 19324566Szliu .quad 0x3487e3289643be35 19424566Szliu .quad 0xdb62066dffba3e54 19524566Szliu .quad 0xcf8e2d5199abbe70 19624566Szliu .quad 0x26f39cb884883e88 19724566Szliu .quad 0x135117d18998be9d 19824566Szliu .quad 0x602ce9742e883eba 19924566Szliu .quad 0xa35ad0be8e38bee3 20024566Szliu .quad 0xffac922249243f12 20124566Szliu .quad 0x7f14ccccccccbf4c 20224566Szliu .quad 0xaa8faaaaaaaa3faa 20324566Szliu .quad 0x0000000000000000 204