124568Szliu# Copyright (c) 1985 Regents of the University of California. 2*34125Sbostic# All rights reserved. 324568Szliu# 4*34125Sbostic# Redistribution and use in source and binary forms are permitted 5*34125Sbostic# provided that this notice is preserved and that due credit is given 6*34125Sbostic# to the University of California at Berkeley. The name of the University 7*34125Sbostic# may not be used to endorse or promote products derived from this 8*34125Sbostic# software without specific prior written permission. This software 9*34125Sbostic# is provided ``as is'' without express or implied warranty. 10*34125Sbostic# 11*34125Sbostic# All recipients should regard themselves as participants in an ongoing 12*34125Sbostic# research project and hence should feel obligated to report their 13*34125Sbostic# experiences (good or bad) with these elementary function codes, using 14*34125Sbostic# the sendbug(8) program, to the authors. 15*34125Sbostic# 16*34125Sbostic# @(#)cbrt.s 5.2 (Berkeley) 04/29/88 17*34125Sbostic# 1824729Selefunt .data 1924729Selefunt .align 2 2024729Selefunt_sccsid: 21*34125Sbostic.asciz "@(#)cbrt.s 1.1 (Berkeley) 5/23/85; 5.2 (ucb.elefunt) 04/29/88" 2224568Szliu 2324568Szliu# double cbrt(double arg) 2424568Szliu# W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry 2524568Szliu# error check by E LeBlanc, 8/18/82 2624568Szliu# Revised and tested by K.C. Ng, 5/2/85 2724568Szliu# Max error less than 0.667 ulps (unit in the last places) 2824568Szliu .globl _cbrt 2924568Szliu .globl _d_cbrt 3024568Szliu .globl _dcbrt_ 3124568Szliu .text 3224568Szliu .align 1 3324568Szliu 3424568Szliu_cbrt: 3524568Szliu_d_cbrt: 3624568Szliu .word 0x00fc # save r2 to r7 3724568Szliu movq 4(ap),r0 # r0 = argument x 3824568Szliu jmp dcbrt2 3924568Szliu_dcbrt_: 4024568Szliu .word 0x00fc # save r2 to r7 4124568Szliu movq *4(ap),r0 # r0 = argument x 4224568Szliu 4324568Szliudcbrt2: bicw3 $0x807f,r0,r2 # biased exponent of x 4424568Szliu jeql return # dcbrt(0)=0 dcbrt(res)=res. operand 4524568Szliu bicw3 $0x7fff,r0,ap # ap has sign(x) 4624568Szliu xorw2 ap,r0 # r0 is abs(x) 4724568Szliu movl r0,r2 # r2 has abs(x) 4824568Szliu rotl $16,r2,r2 # r2 = |x| with bits unscrambled 4924568Szliu divl2 $3,r2 # rough dcbrt with bias/3 5024568Szliu addl2 B,r2 # restore bias, diminish fraction 5124568Szliu rotl $16,r2,r2 # r2=|q|=|dcbrt| to 5 bits 5224568Szliu mulf3 r2,r2,r3 # r3 =qq 5324568Szliu divf2 r0,r3 # r3 = qq/x 5424568Szliu mulf2 r2,r3 5524568Szliu addf2 C,r3 # r3 = s = C + qqq/x 5624568Szliu divf3 r3,D,r4 # r4 = D/s 5724568Szliu addf2 E,r4 5824568Szliu addf2 r4,r3 # r3 = s + E + D/s 5924568Szliu divf3 r3,F,r3 # r3 = F / (s + E + D/s) 6024568Szliu addf2 G,r3 # r3 = G + F / (s + E + D/s) 6124568Szliu mulf2 r3,r2 # r2 = qr3 = new q to 23 bits 6224568Szliu clrl r3 # r2:r3 = q as double float 6324568Szliu muld3 r2,r2,r4 # r4:r5 = qq exactly 6424568Szliu divd2 r4,r0 # r0:r1 = x/(q*q) rounded 6524568Szliu subd3 r2,r0,r6 # r6:r7 = x/(q*q) - q exactly 6624568Szliu movq r2,r4 # r4:r5 = q 6724568Szliu addw2 $0x80,r4 # r4:r5 = 2 * q 6824568Szliu addd2 r0,r4 # r4:r5 = 2*q + x/(q*q) 6924568Szliu divd2 r4,r6 # r6:r7 = (x/(q*q)-q)/(2*q+x/(q*q)) 7024568Szliu muld2 r2,r6 # r6:r7 = q*(x/(q*q)-q)/(2*q+x/(q*q)) 7124568Szliu addd3 r6,r2,r0 # r0:r1 = q + r6:r7 7224568Szliu bisw2 ap,r0 # restore the sign bit 7324568Szliureturn: 7424568Szliu ret # error less than 0.667 ulps 7524568Szliu 7624568Szliu.data 7724568Szliu.align 2 7824568SzliuB : .long 721142941 # (86-0.03306235651)*(2^23) 7924568SzliuC : .float 0f0.5428571429 # 19/35 8024568SzliuD : .float 0f-0.7053061224 # -864/1225 8124568SzliuE : .float 0f1.414285714 # 99/70 8224568SzliuF : .float 0f1.607142857 # 45/28 8324568SzliuG : .float 0f0.3571428571 # 5/14 8424568Szliu 85