xref: /csrg-svn/lib/libm/vax/atan2.s (revision 24566)
1*24566Szliu#
2*24566Szliu# Copyright (c) 1985 Regents of the University of California.
3*24566Szliu#
4*24566Szliu# Use and reproduction of this software are granted  in  accordance  with
5*24566Szliu# the terms and conditions specified in  the  Berkeley  Software  License
6*24566Szliu# Agreement (in particular, this entails acknowledgement of the programs'
7*24566Szliu# source, and inclusion of this notice) with the additional understanding
8*24566Szliu# that  all  recipients  should regard themselves as participants  in  an
9*24566Szliu# ongoing  research  project and hence should  feel  obligated  to report
10*24566Szliu# their  experiences (good or bad) with these elementary function  codes,
11*24566Szliu# using "sendbug 4bsd-bugs@BERKELEY", to the authors.
12*24566Szliu#
13*24566Szliu
14*24566Szliu# @(#)atan2.s	1.1 (ELEFUNT) 09/06/85
15*24566Szliu
16*24566Szliu# ATAN2(Y,X)
17*24566Szliu# RETURN ARG (X+iY)
18*24566Szliu# VAX D FORMAT (56 BITS PRECISION)
19*24566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85;
20*24566Szliu#
21*24566Szliu#
22*24566Szliu# Method :
23*24566Szliu#	1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
24*24566Szliu#	2. Reduce x to positive by (if x and y are unexceptional):
25*24566Szliu#		ARG (x+iy) = arctan(y/x)   	   ... if x > 0,
26*24566Szliu#		ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
27*24566Szliu#	3. According to the integer k=4t+0.25 truncated , t=y/x, the argument
28*24566Szliu#	   is further reduced to one of the following intervals and the
29*24566Szliu#	   arctangent of y/x is evaluated by the corresponding formula:
30*24566Szliu#
31*24566Szliu#          [0,7/16]	   atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
32*24566Szliu#	   [7/16,11/16]    atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) )
33*24566Szliu#	   [11/16.19/16]   atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) )
34*24566Szliu#	   [19/16,39/16]   atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) )
35*24566Szliu#	   [39/16,INF]     atan(y/x) = atan(INF) + atan( -x/y )
36*24566Szliu#
37*24566Szliu# Special cases:
38*24566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y).
39*24566Szliu#
40*24566Szliu#	ARG( NAN , (anything) ) is NaN;
41*24566Szliu#	ARG( (anything), NaN ) is NaN;
42*24566Szliu#	ARG(+(anything but NaN), +-0) is +-0  ;
43*24566Szliu#	ARG(-(anything but NaN), +-0) is +-PI ;
44*24566Szliu#	ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2;
45*24566Szliu#	ARG( +INF,+-(anything but INF and NaN) ) is +-0 ;
46*24566Szliu#	ARG( -INF,+-(anything but INF and NaN) ) is +-PI;
47*24566Szliu#	ARG( +INF,+-INF ) is +-PI/4 ;
48*24566Szliu#	ARG( -INF,+-INF ) is +-3PI/4;
49*24566Szliu#	ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2;
50*24566Szliu#
51*24566Szliu# Accuracy:
52*24566Szliu#	atan2(y,x) returns the exact ARG(x+iy) nearly rounded.
53*24566Szliu#
54*24566Szliu	.text
55*24566Szliu	.align 1
56*24566Szliu	.globl	_atan2
57*24566Szliu_atan2 :
58*24566Szliu	.word	0x0ff4
59*24566Szliu	movq	4(ap),r2		# r2 = y
60*24566Szliu	movq	12(ap),r4		# r4 = x
61*24566Szliu	bicw3	$0x7f,r2,r0
62*24566Szliu	bicw3	$0x7f,r4,r1
63*24566Szliu	cmpw	r0,$0x8000		# y is the reserved operand
64*24566Szliu	jeql	resop
65*24566Szliu	cmpw	r1,$0x8000		# x is the reserved operand
66*24566Szliu	jeql	resop
67*24566Szliu	subl2	$8,sp
68*24566Szliu	bicw3	$0x7fff,r2,-4(fp)	# copy y sign bit to -4(fp)
69*24566Szliu	bicw3	$0x7fff,r4,-8(fp)	# copy x sign bit to -8(fp)
70*24566Szliu	cmpd	r4,$0x4080		# x = 1.0 ?
71*24566Szliu	bneq	xnot1
72*24566Szliu	movq	r2,r0
73*24566Szliu	bicw2	$0x8000,r0		# t = |y|
74*24566Szliu	movq	r0,r2			# y = |y|
75*24566Szliu	brb	begin
76*24566Szliuxnot1:
77*24566Szliu	bicw3	$0x807f,r2,r11		# yexp
78*24566Szliu	jeql	yeq0			# if y=0 goto yeq0
79*24566Szliu	bicw3	$0x807f,r4,r10		# xexp
80*24566Szliu	jeql	pio2			# if x=0 goto pio2
81*24566Szliu	subw2	r10,r11			# k = yexp - xexp
82*24566Szliu	cmpw	r11,$0x2000		# k >= 64 (exp) ?
83*24566Szliu	jgeq	pio2			# atan2 = +-pi/2
84*24566Szliu	divd3	r4,r2,r0		# t = y/x  never overflow
85*24566Szliu	bicw2	$0x8000,r0		# t > 0
86*24566Szliu	bicw2	$0xff80,r2		# clear the exponent of y
87*24566Szliu	bicw2	$0xff80,r4		# clear the exponent of x
88*24566Szliu	bisw2	$0x4080,r2		# normalize y to [1,2)
89*24566Szliu	bisw2	$0x4080,r4		# normalize x to [1,2)
90*24566Szliu	subw2	r11,r4			# scale x so that yexp-xexp=k
91*24566Szliubegin:
92*24566Szliu	cmpw	r0,$0x411c		# t : 39/16
93*24566Szliu	jgeq	L50
94*24566Szliu	addl3	$0x180,r0,r10		# 8*t
95*24566Szliu	cvtrfl	r10,r10			# [8*t] rounded to int
96*24566Szliu	ashl	$-1,r10,r10		# [8*t]/2
97*24566Szliu	casel	r10,$0,$4
98*24566SzliuL1:
99*24566Szliu	.word	L20-L1
100*24566Szliu	.word	L20-L1
101*24566Szliu	.word	L30-L1
102*24566Szliu	.word	L40-L1
103*24566Szliu	.word	L40-L1
104*24566SzliuL10:
105*24566Szliu	movq	$0xb4d9940f985e407b,r6	# Hi=.98279372324732906796d0
106*24566Szliu	movq	$0x21b1879a3bc2a2fc,r8	# Lo=-.17092002525602665777d-17
107*24566Szliu	subd3	r4,r2,r0		# y-x
108*24566Szliu	addw2	$0x80,r0		# 2(y-x)
109*24566Szliu	subd2	r4,r0			# 2(y-x)-x
110*24566Szliu	addw2	$0x80,r4		# 2x
111*24566Szliu	movq	r2,r10
112*24566Szliu	addw2	$0x80,r10		# 2y
113*24566Szliu	addd2	r10,r2			# 3y
114*24566Szliu	addd2	r4,r2			# 3y+2x
115*24566Szliu	divd2	r2,r0			# (2y-3x)/(2x+3y)
116*24566Szliu	brw	L60
117*24566SzliuL20:
118*24566Szliu	cmpw	r0,$0x3280		# t : 2**(-28)
119*24566Szliu	jlss	L80
120*24566Szliu	clrq	r6			# Hi=r6=0, Lo=r8=0
121*24566Szliu	clrq	r8
122*24566Szliu	brw	L60
123*24566SzliuL30:
124*24566Szliu	movq	$0xda7b2b0d63383fed,r6	# Hi=.46364760900080611433d0
125*24566Szliu	movq	$0xf0ea17b2bf912295,r8	# Lo=.10147340032515978826d-17
126*24566Szliu	movq	r2,r0
127*24566Szliu	addw2	$0x80,r0		# 2y
128*24566Szliu	subd2	r4,r0			# 2y-x
129*24566Szliu	addw2	$0x80,r4		# 2x
130*24566Szliu	addd2	r2,r4			# 2x+y
131*24566Szliu	divd2	r4,r0 			# (2y-x)/(2x+y)
132*24566Szliu	brb	L60
133*24566SzliuL50:
134*24566Szliu	movq	$0x68c2a2210fda40c9,r6	# Hi=1.5707963267948966135d1
135*24566Szliu	movq	$0x06e0145c26332326,r8	# Lo=.22517417741562176079d-17
136*24566Szliu	cmpw	r0,$0x5100		# y : 2**57
137*24566Szliu	bgeq	L90
138*24566Szliu	divd3	r2,r4,r0
139*24566Szliu	bisw2	$0x8000,r0 		# -x/y
140*24566Szliu	brb	L60
141*24566SzliuL40:
142*24566Szliu	movq	$0x68c2a2210fda4049,r6	# Hi=.78539816339744830676d0
143*24566Szliu	movq	$0x06e0145c263322a6,r8	# Lo=.11258708870781088040d-17
144*24566Szliu	subd3	r4,r2,r0		# y-x
145*24566Szliu	addd2	r4,r2			# y+x
146*24566Szliu	divd2	r2,r0			# (y-x)/(y+x)
147*24566SzliuL60:
148*24566Szliu	movq	r0,r10
149*24566Szliu	muld2	r0,r0
150*24566Szliu	polyd	r0,$12,ptable
151*24566Szliu	muld2	r10,r0
152*24566Szliu	subd2	r0,r8
153*24566Szliu	addd3	r8,r10,r0
154*24566Szliu	addd2	r6,r0
155*24566SzliuL80:
156*24566Szliu	movw	-8(fp),r2
157*24566Szliu	bneq	pim
158*24566Szliu	bisw2	-4(fp),r0		# return sign(y)*r0
159*24566Szliu	ret
160*24566SzliuL90:					# x >= 2**25
161*24566Szliu	movq	r6,r0
162*24566Szliu	brb	L80
163*24566Szliupim:
164*24566Szliu	subd3	r0,$0x68c2a2210fda4149,r0	# pi-t
165*24566Szliu	bisw2	-4(fp),r0
166*24566Szliu	ret
167*24566Szliuyeq0:
168*24566Szliu	movw	-8(fp),r2
169*24566Szliu	beql	zero			# if sign(x)=1 return pi
170*24566Szliu	movq	$0x68c2a2210fda4149,r0	# pi=3.1415926535897932270d1
171*24566Szliu	ret
172*24566Szliuzero:
173*24566Szliu	clrq	r0			# return 0
174*24566Szliu	ret
175*24566Szliupio2:
176*24566Szliu	movq	$0x68c2a2210fda40c9,r0	# pi/2=1.5707963267948966135d1
177*24566Szliu	bisw2	-4(fp),r0		# return sign(y)*pi/2
178*24566Szliu	ret
179*24566Szliuresop:
180*24566Szliu	movq	$0x8000,r0		# propagate the reserved operand
181*24566Szliu	ret
182*24566Szliu	.align 2
183*24566Szliuptable:
184*24566Szliu	.quad	0xb50f5ce96e7abd60
185*24566Szliu	.quad	0x51e44a42c1073e02
186*24566Szliu	.quad	0x3487e3289643be35
187*24566Szliu	.quad	0xdb62066dffba3e54
188*24566Szliu	.quad	0xcf8e2d5199abbe70
189*24566Szliu	.quad	0x26f39cb884883e88
190*24566Szliu	.quad	0x135117d18998be9d
191*24566Szliu	.quad	0x602ce9742e883eba
192*24566Szliu	.quad	0xa35ad0be8e38bee3
193*24566Szliu	.quad	0xffac922249243f12
194*24566Szliu	.quad	0x7f14ccccccccbf4c
195*24566Szliu	.quad	0xaa8faaaaaaaa3faa
196*24566Szliu	.quad	0x0000000000000000
197