libm/vax/atan2.s

24566Szliu# Copyright (c) 1985 Regents of the University of California.
*34125Sbostic# All rights reserved.
24566Szliu#
*34125Sbostic# Redistribution and use in source and binary forms are permitted
*34125Sbostic# provided that this notice is preserved and that due credit is given
*34125Sbostic# to the University of California at Berkeley. The name of the University
*34125Sbostic# may not be used to endorse or promote products derived from this
*34125Sbostic# software without specific prior written permission. This software
*34125Sbostic# is provided ``as is'' without express or implied warranty.
*34125Sbostic#
*34125Sbostic# All recipients should regard themselves as participants in an ongoing
*34125Sbostic# research project and hence should feel obligated to report their
*34125Sbostic# experiences (good or bad) with these elementary function codes, using
*34125Sbostic# the sendbug(8) program, to the authors.
*34125Sbostic#
*34125Sbostic#	@(#)atan2.s	5.2 (Berkeley) 04/29/88
*34125Sbostic#
24729Selefunt	.data
24729Selefunt	.align	2
24729Selefunt_sccsid:
*34125Sbostic.asciz	"@(#)atan2.s	1.2 (Berkeley) 8/21/85; 5.2 (ucb.elefunt) 04/29/88"
24566Szliu
24566Szliu# ATAN2(Y,X)
24566Szliu# RETURN ARG (X+iY)
24566Szliu# VAX D FORMAT (56 BITS PRECISION)
24566Szliu# CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85;
24566Szliu#
24566Szliu#
24566Szliu# Method :
24566Szliu#	1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
24566Szliu#	2. Reduce x to positive by (if x and y are unexceptional):
24566Szliu#		ARG (x+iy) = arctan(y/x)   	   ... if x > 0,
24566Szliu#		ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
24566Szliu#	3. According to the integer k=4t+0.25 truncated , t=y/x, the argument
24566Szliu#	   is further reduced to one of the following intervals and the
24566Szliu#	   arctangent of y/x is evaluated by the corresponding formula:
24566Szliu#
24566Szliu#          [0,7/16]	   atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
24566Szliu#	   [7/16,11/16]    atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) )
24566Szliu#	   [11/16.19/16]   atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) )
24566Szliu#	   [19/16,39/16]   atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) )
24566Szliu#	   [39/16,INF]     atan(y/x) = atan(INF) + atan( -x/y )
24566Szliu#
24566Szliu# Special cases:
24566Szliu# Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y).
24566Szliu#
24566Szliu#	ARG( NAN , (anything) ) is NaN;
24566Szliu#	ARG( (anything), NaN ) is NaN;
24566Szliu#	ARG(+(anything but NaN), +-0) is +-0  ;
24566Szliu#	ARG(-(anything but NaN), +-0) is +-PI ;
24566Szliu#	ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2;
24566Szliu#	ARG( +INF,+-(anything but INF and NaN) ) is +-0 ;
24566Szliu#	ARG( -INF,+-(anything but INF and NaN) ) is +-PI;
24566Szliu#	ARG( +INF,+-INF ) is +-PI/4 ;
24566Szliu#	ARG( -INF,+-INF ) is +-3PI/4;
24566Szliu#	ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2;
24566Szliu#
24566Szliu# Accuracy:
24566Szliu#	atan2(y,x) returns the exact ARG(x+iy) nearly rounded.
24566Szliu#
24566Szliu	.text
24566Szliu	.align 1
24566Szliu	.globl	_atan2
24566Szliu_atan2 :
24566Szliu	.word	0x0ff4
24566Szliu	movq	4(ap),r2		# r2 = y
24566Szliu	movq	12(ap),r4		# r4 = x
24566Szliu	bicw3	$0x7f,r2,r0
24566Szliu	bicw3	$0x7f,r4,r1
24566Szliu	cmpw	r0,$0x8000		# y is the reserved operand
24566Szliu	jeql	resop
24566Szliu	cmpw	r1,$0x8000		# x is the reserved operand
24566Szliu	jeql	resop
24566Szliu	subl2	$8,sp
24566Szliu	bicw3	$0x7fff,r2,-4(fp)	# copy y sign bit to -4(fp)
24566Szliu	bicw3	$0x7fff,r4,-8(fp)	# copy x sign bit to -8(fp)
24566Szliu	cmpd	r4,$0x4080		# x = 1.0 ?
24566Szliu	bneq	xnot1
24566Szliu	movq	r2,r0
24566Szliu	bicw2	$0x8000,r0		# t = |y|
24566Szliu	movq	r0,r2			# y = |y|
24566Szliu	brb	begin
24566Szliuxnot1:
24566Szliu	bicw3	$0x807f,r2,r11		# yexp
24566Szliu	jeql	yeq0			# if y=0 goto yeq0
24566Szliu	bicw3	$0x807f,r4,r10		# xexp
24566Szliu	jeql	pio2			# if x=0 goto pio2
24566Szliu	subw2	r10,r11			# k = yexp - xexp
24566Szliu	cmpw	r11,$0x2000		# k >= 64 (exp) ?
24566Szliu	jgeq	pio2			# atan2 = +-pi/2
24566Szliu	divd3	r4,r2,r0		# t = y/x  never overflow
24566Szliu	bicw2	$0x8000,r0		# t > 0
24566Szliu	bicw2	$0xff80,r2		# clear the exponent of y
24566Szliu	bicw2	$0xff80,r4		# clear the exponent of x
24566Szliu	bisw2	$0x4080,r2		# normalize y to [1,2)
24566Szliu	bisw2	$0x4080,r4		# normalize x to [1,2)
24566Szliu	subw2	r11,r4			# scale x so that yexp-xexp=k
24566Szliubegin:
24566Szliu	cmpw	r0,$0x411c		# t : 39/16
24566Szliu	jgeq	L50
24566Szliu	addl3	$0x180,r0,r10		# 8*t
24566Szliu	cvtrfl	r10,r10			# [8*t] rounded to int
24566Szliu	ashl	$-1,r10,r10		# [8*t]/2
24566Szliu	casel	r10,$0,$4
24566SzliuL1:
24566Szliu	.word	L20-L1
24566Szliu	.word	L20-L1
24566Szliu	.word	L30-L1
24566Szliu	.word	L40-L1
24566Szliu	.word	L40-L1
24566SzliuL10:
24566Szliu	movq	$0xb4d9940f985e407b,r6	# Hi=.98279372324732906796d0
24566Szliu	movq	$0x21b1879a3bc2a2fc,r8	# Lo=-.17092002525602665777d-17
24566Szliu	subd3	r4,r2,r0		# y-x
24566Szliu	addw2	$0x80,r0		# 2(y-x)
24566Szliu	subd2	r4,r0			# 2(y-x)-x
24566Szliu	addw2	$0x80,r4		# 2x
24566Szliu	movq	r2,r10
24566Szliu	addw2	$0x80,r10		# 2y
24566Szliu	addd2	r10,r2			# 3y
24566Szliu	addd2	r4,r2			# 3y+2x
24566Szliu	divd2	r2,r0			# (2y-3x)/(2x+3y)
24566Szliu	brw	L60
24566SzliuL20:
24566Szliu	cmpw	r0,$0x3280		# t : 2**(-28)
24566Szliu	jlss	L80
24566Szliu	clrq	r6			# Hi=r6=0, Lo=r8=0
24566Szliu	clrq	r8
24566Szliu	brw	L60
24566SzliuL30:
24566Szliu	movq	$0xda7b2b0d63383fed,r6	# Hi=.46364760900080611433d0
24566Szliu	movq	$0xf0ea17b2bf912295,r8	# Lo=.10147340032515978826d-17
24566Szliu	movq	r2,r0
24566Szliu	addw2	$0x80,r0		# 2y
24566Szliu	subd2	r4,r0			# 2y-x
24566Szliu	addw2	$0x80,r4		# 2x
24566Szliu	addd2	r2,r4			# 2x+y
24566Szliu	divd2	r4,r0 			# (2y-x)/(2x+y)
24566Szliu	brb	L60
24566SzliuL50:
24566Szliu	movq	$0x68c2a2210fda40c9,r6	# Hi=1.5707963267948966135d1
24566Szliu	movq	$0x06e0145c26332326,r8	# Lo=.22517417741562176079d-17
24566Szliu	cmpw	r0,$0x5100		# y : 2**57
24566Szliu	bgeq	L90
24566Szliu	divd3	r2,r4,r0
24566Szliu	bisw2	$0x8000,r0 		# -x/y
24566Szliu	brb	L60
24566SzliuL40:
24566Szliu	movq	$0x68c2a2210fda4049,r6	# Hi=.78539816339744830676d0
24566Szliu	movq	$0x06e0145c263322a6,r8	# Lo=.11258708870781088040d-17
24566Szliu	subd3	r4,r2,r0		# y-x
24566Szliu	addd2	r4,r2			# y+x
24566Szliu	divd2	r2,r0			# (y-x)/(y+x)
24566SzliuL60:
24566Szliu	movq	r0,r10
24566Szliu	muld2	r0,r0
24566Szliu	polyd	r0,$12,ptable
24566Szliu	muld2	r10,r0
24566Szliu	subd2	r0,r8
24566Szliu	addd3	r8,r10,r0
24566Szliu	addd2	r6,r0
24566SzliuL80:
24566Szliu	movw	-8(fp),r2
24566Szliu	bneq	pim
24566Szliu	bisw2	-4(fp),r0		# return sign(y)*r0
24566Szliu	ret
24566SzliuL90:					# x >= 2**25
24566Szliu	movq	r6,r0
24566Szliu	brb	L80
24566Szliupim:
24566Szliu	subd3	r0,$0x68c2a2210fda4149,r0	# pi-t
24566Szliu	bisw2	-4(fp),r0
24566Szliu	ret
24566Szliuyeq0:
24566Szliu	movw	-8(fp),r2
24566Szliu	beql	zero			# if sign(x)=1 return pi
24566Szliu	movq	$0x68c2a2210fda4149,r0	# pi=3.1415926535897932270d1
24566Szliu	ret
24566Szliuzero:
24566Szliu	clrq	r0			# return 0
24566Szliu	ret
24566Szliupio2:
24566Szliu	movq	$0x68c2a2210fda40c9,r0	# pi/2=1.5707963267948966135d1
24566Szliu	bisw2	-4(fp),r0		# return sign(y)*pi/2
24566Szliu	ret
24566Szliuresop:
24566Szliu	movq	$0x8000,r0		# propagate the reserved operand
24566Szliu	ret
24566Szliu	.align 2
24566Szliuptable:
24566Szliu	.quad	0xb50f5ce96e7abd60
24566Szliu	.quad	0x51e44a42c1073e02
24566Szliu	.quad	0x3487e3289643be35
24566Szliu	.quad	0xdb62066dffba3e54
24566Szliu	.quad	0xcf8e2d5199abbe70
24566Szliu	.quad	0x26f39cb884883e88
24566Szliu	.quad	0x135117d18998be9d
24566Szliu	.quad	0x602ce9742e883eba
24566Szliu	.quad	0xa35ad0be8e38bee3
24566Szliu	.quad	0xffac922249243f12
24566Szliu	.quad	0x7f14ccccccccbf4c
24566Szliu	.quad	0xaa8faaaaaaaa3faa
24566Szliu	.quad	0x0000000000000000