xref: /csrg-svn/lib/libm/vax/atan2.s (revision 34125)
124566Szliu# Copyright (c) 1985 Regents of the University of California.
2*34125Sbostic# All rights reserved.
324566Szliu#
4*34125Sbostic# Redistribution and use in source and binary forms are permitted
5*34125Sbostic# provided that this notice is preserved and that due credit is given
6*34125Sbostic# to the University of California at Berkeley. The name of the University
7*34125Sbostic# may not be used to endorse or promote products derived from this
8*34125Sbostic# software without specific prior written permission. This software
9*34125Sbostic# is provided ``as is'' without express or implied warranty.
10*34125Sbostic#
11*34125Sbostic# All recipients should regard themselves as participants in an ongoing
12*34125Sbostic# research project and hence should feel obligated to report their
13*34125Sbostic# experiences (good or bad) with these elementary function codes, using
14*34125Sbostic# the sendbug(8) program, to the authors.
15*34125Sbostic#
16*34125Sbostic#	@(#)atan2.s	5.2 (Berkeley) 04/29/88
17*34125Sbostic#
1824729Selefunt	.data
1924729Selefunt	.align	2
2024729Selefunt_sccsid:
21*34125Sbostic.asciz	"@(#)atan2.s	1.2 (Berkeley) 8/21/85; 5.2 (ucb.elefunt) 04/29/88"
2224566Szliu
2324566Szliu# ATAN2(Y,X)
2424566Szliu# RETURN ARG (X+iY)
2524566Szliu# VAX D FORMAT (56 BITS PRECISION)
2624566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85;
2724566Szliu#
2824566Szliu#
2924566Szliu# Method :
3024566Szliu#	1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
3124566Szliu#	2. Reduce x to positive by (if x and y are unexceptional):
3224566Szliu#		ARG (x+iy) = arctan(y/x)   	   ... if x > 0,
3324566Szliu#		ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
3424566Szliu#	3. According to the integer k=4t+0.25 truncated , t=y/x, the argument
3524566Szliu#	   is further reduced to one of the following intervals and the
3624566Szliu#	   arctangent of y/x is evaluated by the corresponding formula:
3724566Szliu#
3824566Szliu#          [0,7/16]	   atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
3924566Szliu#	   [7/16,11/16]    atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) )
4024566Szliu#	   [11/16.19/16]   atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) )
4124566Szliu#	   [19/16,39/16]   atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) )
4224566Szliu#	   [39/16,INF]     atan(y/x) = atan(INF) + atan( -x/y )
4324566Szliu#
4424566Szliu# Special cases:
4524566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y).
4624566Szliu#
4724566Szliu#	ARG( NAN , (anything) ) is NaN;
4824566Szliu#	ARG( (anything), NaN ) is NaN;
4924566Szliu#	ARG(+(anything but NaN), +-0) is +-0  ;
5024566Szliu#	ARG(-(anything but NaN), +-0) is +-PI ;
5124566Szliu#	ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2;
5224566Szliu#	ARG( +INF,+-(anything but INF and NaN) ) is +-0 ;
5324566Szliu#	ARG( -INF,+-(anything but INF and NaN) ) is +-PI;
5424566Szliu#	ARG( +INF,+-INF ) is +-PI/4 ;
5524566Szliu#	ARG( -INF,+-INF ) is +-3PI/4;
5624566Szliu#	ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2;
5724566Szliu#
5824566Szliu# Accuracy:
5924566Szliu#	atan2(y,x) returns the exact ARG(x+iy) nearly rounded.
6024566Szliu#
6124566Szliu	.text
6224566Szliu	.align 1
6324566Szliu	.globl	_atan2
6424566Szliu_atan2 :
6524566Szliu	.word	0x0ff4
6624566Szliu	movq	4(ap),r2		# r2 = y
6724566Szliu	movq	12(ap),r4		# r4 = x
6824566Szliu	bicw3	$0x7f,r2,r0
6924566Szliu	bicw3	$0x7f,r4,r1
7024566Szliu	cmpw	r0,$0x8000		# y is the reserved operand
7124566Szliu	jeql	resop
7224566Szliu	cmpw	r1,$0x8000		# x is the reserved operand
7324566Szliu	jeql	resop
7424566Szliu	subl2	$8,sp
7524566Szliu	bicw3	$0x7fff,r2,-4(fp)	# copy y sign bit to -4(fp)
7624566Szliu	bicw3	$0x7fff,r4,-8(fp)	# copy x sign bit to -8(fp)
7724566Szliu	cmpd	r4,$0x4080		# x = 1.0 ?
7824566Szliu	bneq	xnot1
7924566Szliu	movq	r2,r0
8024566Szliu	bicw2	$0x8000,r0		# t = |y|
8124566Szliu	movq	r0,r2			# y = |y|
8224566Szliu	brb	begin
8324566Szliuxnot1:
8424566Szliu	bicw3	$0x807f,r2,r11		# yexp
8524566Szliu	jeql	yeq0			# if y=0 goto yeq0
8624566Szliu	bicw3	$0x807f,r4,r10		# xexp
8724566Szliu	jeql	pio2			# if x=0 goto pio2
8824566Szliu	subw2	r10,r11			# k = yexp - xexp
8924566Szliu	cmpw	r11,$0x2000		# k >= 64 (exp) ?
9024566Szliu	jgeq	pio2			# atan2 = +-pi/2
9124566Szliu	divd3	r4,r2,r0		# t = y/x  never overflow
9224566Szliu	bicw2	$0x8000,r0		# t > 0
9324566Szliu	bicw2	$0xff80,r2		# clear the exponent of y
9424566Szliu	bicw2	$0xff80,r4		# clear the exponent of x
9524566Szliu	bisw2	$0x4080,r2		# normalize y to [1,2)
9624566Szliu	bisw2	$0x4080,r4		# normalize x to [1,2)
9724566Szliu	subw2	r11,r4			# scale x so that yexp-xexp=k
9824566Szliubegin:
9924566Szliu	cmpw	r0,$0x411c		# t : 39/16
10024566Szliu	jgeq	L50
10124566Szliu	addl3	$0x180,r0,r10		# 8*t
10224566Szliu	cvtrfl	r10,r10			# [8*t] rounded to int
10324566Szliu	ashl	$-1,r10,r10		# [8*t]/2
10424566Szliu	casel	r10,$0,$4
10524566SzliuL1:
10624566Szliu	.word	L20-L1
10724566Szliu	.word	L20-L1
10824566Szliu	.word	L30-L1
10924566Szliu	.word	L40-L1
11024566Szliu	.word	L40-L1
11124566SzliuL10:
11224566Szliu	movq	$0xb4d9940f985e407b,r6	# Hi=.98279372324732906796d0
11324566Szliu	movq	$0x21b1879a3bc2a2fc,r8	# Lo=-.17092002525602665777d-17
11424566Szliu	subd3	r4,r2,r0		# y-x
11524566Szliu	addw2	$0x80,r0		# 2(y-x)
11624566Szliu	subd2	r4,r0			# 2(y-x)-x
11724566Szliu	addw2	$0x80,r4		# 2x
11824566Szliu	movq	r2,r10
11924566Szliu	addw2	$0x80,r10		# 2y
12024566Szliu	addd2	r10,r2			# 3y
12124566Szliu	addd2	r4,r2			# 3y+2x
12224566Szliu	divd2	r2,r0			# (2y-3x)/(2x+3y)
12324566Szliu	brw	L60
12424566SzliuL20:
12524566Szliu	cmpw	r0,$0x3280		# t : 2**(-28)
12624566Szliu	jlss	L80
12724566Szliu	clrq	r6			# Hi=r6=0, Lo=r8=0
12824566Szliu	clrq	r8
12924566Szliu	brw	L60
13024566SzliuL30:
13124566Szliu	movq	$0xda7b2b0d63383fed,r6	# Hi=.46364760900080611433d0
13224566Szliu	movq	$0xf0ea17b2bf912295,r8	# Lo=.10147340032515978826d-17
13324566Szliu	movq	r2,r0
13424566Szliu	addw2	$0x80,r0		# 2y
13524566Szliu	subd2	r4,r0			# 2y-x
13624566Szliu	addw2	$0x80,r4		# 2x
13724566Szliu	addd2	r2,r4			# 2x+y
13824566Szliu	divd2	r4,r0 			# (2y-x)/(2x+y)
13924566Szliu	brb	L60
14024566SzliuL50:
14124566Szliu	movq	$0x68c2a2210fda40c9,r6	# Hi=1.5707963267948966135d1
14224566Szliu	movq	$0x06e0145c26332326,r8	# Lo=.22517417741562176079d-17
14324566Szliu	cmpw	r0,$0x5100		# y : 2**57
14424566Szliu	bgeq	L90
14524566Szliu	divd3	r2,r4,r0
14624566Szliu	bisw2	$0x8000,r0 		# -x/y
14724566Szliu	brb	L60
14824566SzliuL40:
14924566Szliu	movq	$0x68c2a2210fda4049,r6	# Hi=.78539816339744830676d0
15024566Szliu	movq	$0x06e0145c263322a6,r8	# Lo=.11258708870781088040d-17
15124566Szliu	subd3	r4,r2,r0		# y-x
15224566Szliu	addd2	r4,r2			# y+x
15324566Szliu	divd2	r2,r0			# (y-x)/(y+x)
15424566SzliuL60:
15524566Szliu	movq	r0,r10
15624566Szliu	muld2	r0,r0
15724566Szliu	polyd	r0,$12,ptable
15824566Szliu	muld2	r10,r0
15924566Szliu	subd2	r0,r8
16024566Szliu	addd3	r8,r10,r0
16124566Szliu	addd2	r6,r0
16224566SzliuL80:
16324566Szliu	movw	-8(fp),r2
16424566Szliu	bneq	pim
16524566Szliu	bisw2	-4(fp),r0		# return sign(y)*r0
16624566Szliu	ret
16724566SzliuL90:					# x >= 2**25
16824566Szliu	movq	r6,r0
16924566Szliu	brb	L80
17024566Szliupim:
17124566Szliu	subd3	r0,$0x68c2a2210fda4149,r0	# pi-t
17224566Szliu	bisw2	-4(fp),r0
17324566Szliu	ret
17424566Szliuyeq0:
17524566Szliu	movw	-8(fp),r2
17624566Szliu	beql	zero			# if sign(x)=1 return pi
17724566Szliu	movq	$0x68c2a2210fda4149,r0	# pi=3.1415926535897932270d1
17824566Szliu	ret
17924566Szliuzero:
18024566Szliu	clrq	r0			# return 0
18124566Szliu	ret
18224566Szliupio2:
18324566Szliu	movq	$0x68c2a2210fda40c9,r0	# pi/2=1.5707963267948966135d1
18424566Szliu	bisw2	-4(fp),r0		# return sign(y)*pi/2
18524566Szliu	ret
18624566Szliuresop:
18724566Szliu	movq	$0x8000,r0		# propagate the reserved operand
18824566Szliu	ret
18924566Szliu	.align 2
19024566Szliuptable:
19124566Szliu	.quad	0xb50f5ce96e7abd60
19224566Szliu	.quad	0x51e44a42c1073e02
19324566Szliu	.quad	0x3487e3289643be35
19424566Szliu	.quad	0xdb62066dffba3e54
19524566Szliu	.quad	0xcf8e2d5199abbe70
19624566Szliu	.quad	0x26f39cb884883e88
19724566Szliu	.quad	0x135117d18998be9d
19824566Szliu	.quad	0x602ce9742e883eba
19924566Szliu	.quad	0xa35ad0be8e38bee3
20024566Szliu	.quad	0xffac922249243f12
20124566Szliu	.quad	0x7f14ccccccccbf4c
20224566Szliu	.quad	0xaa8faaaaaaaa3faa
20324566Szliu	.quad	0x0000000000000000
204