1*24568Szliu# 2*24568Szliu# Copyright (c) 1985 Regents of the University of California. 3*24568Szliu# 4*24568Szliu# Use and reproduction of this software are granted in accordance with 5*24568Szliu# the terms and conditions specified in the Berkeley Software License 6*24568Szliu# Agreement (in particular, this entails acknowledgement of the programs' 7*24568Szliu# source, and inclusion of this notice) with the additional understanding 8*24568Szliu# that all recipients should regard themselves as participants in an 9*24568Szliu# ongoing research project and hence should feel obligated to report 10*24568Szliu# their experiences (good or bad) with these elementary function codes, 11*24568Szliu# using "sendbug 4bsd-bugs@BERKELEY", to the authors. 12*24568Szliu# 13*24568Szliu 14*24568Szliu# @(#)cbrt.s 1.1 (ELEFUNT) 09/06/85 15*24568Szliu 16*24568Szliu# double cbrt(double arg) 17*24568Szliu# W. Kahan, 10/13/80. revised 1/13/84 for keeping sign symmetry 18*24568Szliu# error check by E LeBlanc, 8/18/82 19*24568Szliu# Revised and tested by K.C. Ng, 5/2/85 20*24568Szliu# Max error less than 0.667 ulps (unit in the last places) 21*24568Szliu .globl _cbrt 22*24568Szliu .globl _d_cbrt 23*24568Szliu .globl _dcbrt_ 24*24568Szliu .text 25*24568Szliu .align 1 26*24568Szliu 27*24568Szliu_cbrt: 28*24568Szliu_d_cbrt: 29*24568Szliu .word 0x00fc # save r2 to r7 30*24568Szliu movq 4(ap),r0 # r0 = argument x 31*24568Szliu jmp dcbrt2 32*24568Szliu_dcbrt_: 33*24568Szliu .word 0x00fc # save r2 to r7 34*24568Szliu movq *4(ap),r0 # r0 = argument x 35*24568Szliu 36*24568Szliudcbrt2: bicw3 $0x807f,r0,r2 # biased exponent of x 37*24568Szliu jeql return # dcbrt(0)=0 dcbrt(res)=res. operand 38*24568Szliu bicw3 $0x7fff,r0,ap # ap has sign(x) 39*24568Szliu xorw2 ap,r0 # r0 is abs(x) 40*24568Szliu movl r0,r2 # r2 has abs(x) 41*24568Szliu rotl $16,r2,r2 # r2 = |x| with bits unscrambled 42*24568Szliu divl2 $3,r2 # rough dcbrt with bias/3 43*24568Szliu addl2 B,r2 # restore bias, diminish fraction 44*24568Szliu rotl $16,r2,r2 # r2=|q|=|dcbrt| to 5 bits 45*24568Szliu mulf3 r2,r2,r3 # r3 =qq 46*24568Szliu divf2 r0,r3 # r3 = qq/x 47*24568Szliu mulf2 r2,r3 48*24568Szliu addf2 C,r3 # r3 = s = C + qqq/x 49*24568Szliu divf3 r3,D,r4 # r4 = D/s 50*24568Szliu addf2 E,r4 51*24568Szliu addf2 r4,r3 # r3 = s + E + D/s 52*24568Szliu divf3 r3,F,r3 # r3 = F / (s + E + D/s) 53*24568Szliu addf2 G,r3 # r3 = G + F / (s + E + D/s) 54*24568Szliu mulf2 r3,r2 # r2 = qr3 = new q to 23 bits 55*24568Szliu clrl r3 # r2:r3 = q as double float 56*24568Szliu muld3 r2,r2,r4 # r4:r5 = qq exactly 57*24568Szliu divd2 r4,r0 # r0:r1 = x/(q*q) rounded 58*24568Szliu subd3 r2,r0,r6 # r6:r7 = x/(q*q) - q exactly 59*24568Szliu movq r2,r4 # r4:r5 = q 60*24568Szliu addw2 $0x80,r4 # r4:r5 = 2 * q 61*24568Szliu addd2 r0,r4 # r4:r5 = 2*q + x/(q*q) 62*24568Szliu divd2 r4,r6 # r6:r7 = (x/(q*q)-q)/(2*q+x/(q*q)) 63*24568Szliu muld2 r2,r6 # r6:r7 = q*(x/(q*q)-q)/(2*q+x/(q*q)) 64*24568Szliu addd3 r6,r2,r0 # r0:r1 = q + r6:r7 65*24568Szliu bisw2 ap,r0 # restore the sign bit 66*24568Szliureturn: 67*24568Szliu ret # error less than 0.667 ulps 68*24568Szliu 69*24568Szliu.data 70*24568Szliu.align 2 71*24568SzliuB : .long 721142941 # (86-0.03306235651)*(2^23) 72*24568SzliuC : .float 0f0.5428571429 # 19/35 73*24568SzliuD : .float 0f-0.7053061224 # -864/1225 74*24568SzliuE : .float 0f1.414285714 # 99/70 75*24568SzliuF : .float 0f1.607142857 # 45/28 76*24568SzliuG : .float 0f0.3571428571 # 5/14 77*24568Szliu 78