xref: /csrg-svn/lib/libm/vax/atan2.s (revision 24729)
124566Szliu#
224566Szliu# Copyright (c) 1985 Regents of the University of California.
324566Szliu#
424566Szliu# Use and reproduction of this software are granted  in  accordance  with
524566Szliu# the terms and conditions specified in  the  Berkeley  Software  License
624566Szliu# Agreement (in particular, this entails acknowledgement of the programs'
724566Szliu# source, and inclusion of this notice) with the additional understanding
824566Szliu# that  all  recipients  should regard themselves as participants  in  an
924566Szliu# ongoing  research  project and hence should  feel  obligated  to report
1024566Szliu# their  experiences (good or bad) with these elementary function  codes,
1124566Szliu# using "sendbug 4bsd-bugs@BERKELEY", to the authors.
1224566Szliu#
13*24729Selefunt	.data
14*24729Selefunt	.align	2
15*24729Selefunt_sccsid:
16*24729Selefunt.asciz	"@(#)atan2.s	1.2 (Berkeley) 8/21/85; 1.3 (ucb.elefunt) 09/12/85"
1724566Szliu
1824566Szliu# ATAN2(Y,X)
1924566Szliu# RETURN ARG (X+iY)
2024566Szliu# VAX D FORMAT (56 BITS PRECISION)
2124566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85;
2224566Szliu#
2324566Szliu#
2424566Szliu# Method :
2524566Szliu#	1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
2624566Szliu#	2. Reduce x to positive by (if x and y are unexceptional):
2724566Szliu#		ARG (x+iy) = arctan(y/x)   	   ... if x > 0,
2824566Szliu#		ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
2924566Szliu#	3. According to the integer k=4t+0.25 truncated , t=y/x, the argument
3024566Szliu#	   is further reduced to one of the following intervals and the
3124566Szliu#	   arctangent of y/x is evaluated by the corresponding formula:
3224566Szliu#
3324566Szliu#          [0,7/16]	   atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
3424566Szliu#	   [7/16,11/16]    atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) )
3524566Szliu#	   [11/16.19/16]   atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) )
3624566Szliu#	   [19/16,39/16]   atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) )
3724566Szliu#	   [39/16,INF]     atan(y/x) = atan(INF) + atan( -x/y )
3824566Szliu#
3924566Szliu# Special cases:
4024566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y).
4124566Szliu#
4224566Szliu#	ARG( NAN , (anything) ) is NaN;
4324566Szliu#	ARG( (anything), NaN ) is NaN;
4424566Szliu#	ARG(+(anything but NaN), +-0) is +-0  ;
4524566Szliu#	ARG(-(anything but NaN), +-0) is +-PI ;
4624566Szliu#	ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2;
4724566Szliu#	ARG( +INF,+-(anything but INF and NaN) ) is +-0 ;
4824566Szliu#	ARG( -INF,+-(anything but INF and NaN) ) is +-PI;
4924566Szliu#	ARG( +INF,+-INF ) is +-PI/4 ;
5024566Szliu#	ARG( -INF,+-INF ) is +-3PI/4;
5124566Szliu#	ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2;
5224566Szliu#
5324566Szliu# Accuracy:
5424566Szliu#	atan2(y,x) returns the exact ARG(x+iy) nearly rounded.
5524566Szliu#
5624566Szliu	.text
5724566Szliu	.align 1
5824566Szliu	.globl	_atan2
5924566Szliu_atan2 :
6024566Szliu	.word	0x0ff4
6124566Szliu	movq	4(ap),r2		# r2 = y
6224566Szliu	movq	12(ap),r4		# r4 = x
6324566Szliu	bicw3	$0x7f,r2,r0
6424566Szliu	bicw3	$0x7f,r4,r1
6524566Szliu	cmpw	r0,$0x8000		# y is the reserved operand
6624566Szliu	jeql	resop
6724566Szliu	cmpw	r1,$0x8000		# x is the reserved operand
6824566Szliu	jeql	resop
6924566Szliu	subl2	$8,sp
7024566Szliu	bicw3	$0x7fff,r2,-4(fp)	# copy y sign bit to -4(fp)
7124566Szliu	bicw3	$0x7fff,r4,-8(fp)	# copy x sign bit to -8(fp)
7224566Szliu	cmpd	r4,$0x4080		# x = 1.0 ?
7324566Szliu	bneq	xnot1
7424566Szliu	movq	r2,r0
7524566Szliu	bicw2	$0x8000,r0		# t = |y|
7624566Szliu	movq	r0,r2			# y = |y|
7724566Szliu	brb	begin
7824566Szliuxnot1:
7924566Szliu	bicw3	$0x807f,r2,r11		# yexp
8024566Szliu	jeql	yeq0			# if y=0 goto yeq0
8124566Szliu	bicw3	$0x807f,r4,r10		# xexp
8224566Szliu	jeql	pio2			# if x=0 goto pio2
8324566Szliu	subw2	r10,r11			# k = yexp - xexp
8424566Szliu	cmpw	r11,$0x2000		# k >= 64 (exp) ?
8524566Szliu	jgeq	pio2			# atan2 = +-pi/2
8624566Szliu	divd3	r4,r2,r0		# t = y/x  never overflow
8724566Szliu	bicw2	$0x8000,r0		# t > 0
8824566Szliu	bicw2	$0xff80,r2		# clear the exponent of y
8924566Szliu	bicw2	$0xff80,r4		# clear the exponent of x
9024566Szliu	bisw2	$0x4080,r2		# normalize y to [1,2)
9124566Szliu	bisw2	$0x4080,r4		# normalize x to [1,2)
9224566Szliu	subw2	r11,r4			# scale x so that yexp-xexp=k
9324566Szliubegin:
9424566Szliu	cmpw	r0,$0x411c		# t : 39/16
9524566Szliu	jgeq	L50
9624566Szliu	addl3	$0x180,r0,r10		# 8*t
9724566Szliu	cvtrfl	r10,r10			# [8*t] rounded to int
9824566Szliu	ashl	$-1,r10,r10		# [8*t]/2
9924566Szliu	casel	r10,$0,$4
10024566SzliuL1:
10124566Szliu	.word	L20-L1
10224566Szliu	.word	L20-L1
10324566Szliu	.word	L30-L1
10424566Szliu	.word	L40-L1
10524566Szliu	.word	L40-L1
10624566SzliuL10:
10724566Szliu	movq	$0xb4d9940f985e407b,r6	# Hi=.98279372324732906796d0
10824566Szliu	movq	$0x21b1879a3bc2a2fc,r8	# Lo=-.17092002525602665777d-17
10924566Szliu	subd3	r4,r2,r0		# y-x
11024566Szliu	addw2	$0x80,r0		# 2(y-x)
11124566Szliu	subd2	r4,r0			# 2(y-x)-x
11224566Szliu	addw2	$0x80,r4		# 2x
11324566Szliu	movq	r2,r10
11424566Szliu	addw2	$0x80,r10		# 2y
11524566Szliu	addd2	r10,r2			# 3y
11624566Szliu	addd2	r4,r2			# 3y+2x
11724566Szliu	divd2	r2,r0			# (2y-3x)/(2x+3y)
11824566Szliu	brw	L60
11924566SzliuL20:
12024566Szliu	cmpw	r0,$0x3280		# t : 2**(-28)
12124566Szliu	jlss	L80
12224566Szliu	clrq	r6			# Hi=r6=0, Lo=r8=0
12324566Szliu	clrq	r8
12424566Szliu	brw	L60
12524566SzliuL30:
12624566Szliu	movq	$0xda7b2b0d63383fed,r6	# Hi=.46364760900080611433d0
12724566Szliu	movq	$0xf0ea17b2bf912295,r8	# Lo=.10147340032515978826d-17
12824566Szliu	movq	r2,r0
12924566Szliu	addw2	$0x80,r0		# 2y
13024566Szliu	subd2	r4,r0			# 2y-x
13124566Szliu	addw2	$0x80,r4		# 2x
13224566Szliu	addd2	r2,r4			# 2x+y
13324566Szliu	divd2	r4,r0 			# (2y-x)/(2x+y)
13424566Szliu	brb	L60
13524566SzliuL50:
13624566Szliu	movq	$0x68c2a2210fda40c9,r6	# Hi=1.5707963267948966135d1
13724566Szliu	movq	$0x06e0145c26332326,r8	# Lo=.22517417741562176079d-17
13824566Szliu	cmpw	r0,$0x5100		# y : 2**57
13924566Szliu	bgeq	L90
14024566Szliu	divd3	r2,r4,r0
14124566Szliu	bisw2	$0x8000,r0 		# -x/y
14224566Szliu	brb	L60
14324566SzliuL40:
14424566Szliu	movq	$0x68c2a2210fda4049,r6	# Hi=.78539816339744830676d0
14524566Szliu	movq	$0x06e0145c263322a6,r8	# Lo=.11258708870781088040d-17
14624566Szliu	subd3	r4,r2,r0		# y-x
14724566Szliu	addd2	r4,r2			# y+x
14824566Szliu	divd2	r2,r0			# (y-x)/(y+x)
14924566SzliuL60:
15024566Szliu	movq	r0,r10
15124566Szliu	muld2	r0,r0
15224566Szliu	polyd	r0,$12,ptable
15324566Szliu	muld2	r10,r0
15424566Szliu	subd2	r0,r8
15524566Szliu	addd3	r8,r10,r0
15624566Szliu	addd2	r6,r0
15724566SzliuL80:
15824566Szliu	movw	-8(fp),r2
15924566Szliu	bneq	pim
16024566Szliu	bisw2	-4(fp),r0		# return sign(y)*r0
16124566Szliu	ret
16224566SzliuL90:					# x >= 2**25
16324566Szliu	movq	r6,r0
16424566Szliu	brb	L80
16524566Szliupim:
16624566Szliu	subd3	r0,$0x68c2a2210fda4149,r0	# pi-t
16724566Szliu	bisw2	-4(fp),r0
16824566Szliu	ret
16924566Szliuyeq0:
17024566Szliu	movw	-8(fp),r2
17124566Szliu	beql	zero			# if sign(x)=1 return pi
17224566Szliu	movq	$0x68c2a2210fda4149,r0	# pi=3.1415926535897932270d1
17324566Szliu	ret
17424566Szliuzero:
17524566Szliu	clrq	r0			# return 0
17624566Szliu	ret
17724566Szliupio2:
17824566Szliu	movq	$0x68c2a2210fda40c9,r0	# pi/2=1.5707963267948966135d1
17924566Szliu	bisw2	-4(fp),r0		# return sign(y)*pi/2
18024566Szliu	ret
18124566Szliuresop:
18224566Szliu	movq	$0x8000,r0		# propagate the reserved operand
18324566Szliu	ret
18424566Szliu	.align 2
18524566Szliuptable:
18624566Szliu	.quad	0xb50f5ce96e7abd60
18724566Szliu	.quad	0x51e44a42c1073e02
18824566Szliu	.quad	0x3487e3289643be35
18924566Szliu	.quad	0xdb62066dffba3e54
19024566Szliu	.quad	0xcf8e2d5199abbe70
19124566Szliu	.quad	0x26f39cb884883e88
19224566Szliu	.quad	0x135117d18998be9d
19324566Szliu	.quad	0x602ce9742e883eba
19424566Szliu	.quad	0xa35ad0be8e38bee3
19524566Szliu	.quad	0xffac922249243f12
19624566Szliu	.quad	0x7f14ccccccccbf4c
19724566Szliu	.quad	0xaa8faaaaaaaa3faa
19824566Szliu	.quad	0x0000000000000000
199