1/* 2 * Copyright (c) 1985 Regents of the University of California. 3 * 4 * Use and reproduction of this software are granted in accordance with 5 * the terms and conditions specified in the Berkeley Software License 6 * Agreement (in particular, this entails acknowledgement of the programs' 7 * source, and inclusion of this notice) with the additional understanding 8 * that all recipients should regard themselves as participants in an 9 * ongoing research project and hence should feel obligated to report 10 * their experiences (good or bad) with these elementary function codes, 11 * using "sendbug 4bsd-bugs@BERKELEY", to the authors. 12 */ 13 .data 14 .align 2 15_sccsid: 16.asciz "@(#)sqrt.s 1.1 (Berkeley) 8/21/85; 1.3 (ucb.elefunt) 09/12/85" 17 18/* 19 * double sqrt(arg) revised August 15,1982 20 * double arg; 21 * if(arg<0.0) { _errno = EDOM; return(<a reserved operand>); } 22 * if arg is a reserved operand it is returned as it is 23 * W. Kahan's magic square root 24 * coded by Heidi Stettner and revised by Emile LeBlanc 8/18/82 25 * 26 * entry points:_d_sqrt address of double arg is on the stack 27 * _sqrt double arg is on the stack 28 */ 29 .text 30 .align 1 31 .globl _sqrt 32 .globl _d_sqrt 33 .globl libm$dsqrt_r5 34 .set EDOM,33 35 36_d_sqrt: 37 .word 0x003c # save r5,r4,r3,r2 38 movq *4(ap),r0 39 jmp dsqrt2 40_sqrt: 41 .word 0x003c # save r5,r4,r3,r2 42 movq 4(ap),r0 43dsqrt2: bicw3 $0x807f,r0,r2 # check exponent of input 44 jeql noexp # biased exponent is zero -> 0.0 or reserved 45 bsbb libm$dsqrt_r5 46noexp: ret 47 48/* **************************** internal procedure */ 49 50libm$dsqrt_r5: # ENTRY POINT FOR cdabs and cdsqrt 51 # returns double square root scaled by 52 # 2^r6 53 54 movd r0,r4 55 jleq nonpos # argument is not positive 56 movzwl r4,r2 57 ashl $-1,r2,r0 58 addw2 $0x203c,r0 # r0 has magic initial approximation 59/* 60 * Do two steps of Heron's rule 61 * ((arg/guess) + guess) / 2 = better guess 62 */ 63 divf3 r0,r4,r2 64 addf2 r2,r0 65 subw2 $0x80,r0 # divide by two 66 67 divf3 r0,r4,r2 68 addf2 r2,r0 69 subw2 $0x80,r0 # divide by two 70 71/* Scale argument and approximation to prevent over/underflow */ 72 73 bicw3 $0x807f,r4,r1 74 subw2 $0x4080,r1 # r1 contains scaling factor 75 subw2 r1,r4 76 movl r0,r2 77 subw2 r1,r2 78 79/* Cubic step 80 * 81 * b = a + 2*a*(n-a*a)/(n+3*a*a) where b is better approximation, 82 * a is approximation, and n is the original argument. 83 * (let s be scale factor in the following comments) 84 */ 85 clrl r1 86 clrl r3 87 muld2 r0,r2 # r2:r3 = a*a/s 88 subd2 r2,r4 # r4:r5 = n/s - a*a/s 89 addw2 $0x100,r2 # r2:r3 = 4*a*a/s 90 addd2 r4,r2 # r2:r3 = n/s + 3*a*a/s 91 muld2 r0,r4 # r4:r5 = a*n/s - a*a*a/s 92 divd2 r2,r4 # r4:r5 = a*(n-a*a)/(n+3*a*a) 93 addw2 $0x80,r4 # r4:r5 = 2*a*(n-a*a)/(n+3*a*a) 94 addd2 r4,r0 # r0:r1 = a + 2*a*(n-a*a)/(n+3*a*a) 95 rsb # DONE! 96nonpos: 97 jneq negarg 98 ret # argument and root are zero 99negarg: 100 pushl $EDOM 101 calls $1,_infnan # generate the reserved op fault 102 ret 103