105a0b428SJohn Marino/* $OpenBSD: s_log1p.S,v 1.3 2009/04/08 23:31:34 martynas Exp $ */ 205a0b428SJohn Marino/* 305a0b428SJohn Marino * Written by J.T. Conklin <jtc@NetBSD.org>. 405a0b428SJohn Marino * Public domain. 505a0b428SJohn Marino */ 605a0b428SJohn Marino 705a0b428SJohn Marino/* 805a0b428SJohn Marino * Modified by Lex Wennmacher <wennmach@NetBSD.org> 905a0b428SJohn Marino * Still public domain. 1005a0b428SJohn Marino */ 1105a0b428SJohn Marino 1205a0b428SJohn Marino#include <machine/asm.h> 1305a0b428SJohn Marino 1405a0b428SJohn Marino#include "abi.h" 1505a0b428SJohn Marino 1605a0b428SJohn Marino/* 1705a0b428SJohn Marino * The log1p() function is provided to compute an accurate value of 1805a0b428SJohn Marino * log(1 + x), even for tiny values of x. The i387 FPU provides the 1905a0b428SJohn Marino * fyl2xp1 instruction for this purpose. However, the range of this 2005a0b428SJohn Marino * instruction is limited to: 2105a0b428SJohn Marino * -(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1 2205a0b428SJohn Marino * -0.292893 <= x <= 0.414214 2305a0b428SJohn Marino * at least on older processor versions. 2405a0b428SJohn Marino * 2505a0b428SJohn Marino * log1p() is implemented by testing the range of the argument. 2605a0b428SJohn Marino * If it is appropriate for fyl2xp1, this instruction is used. 2705a0b428SJohn Marino * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way 2805a0b428SJohn Marino * (using fyl2x). 2905a0b428SJohn Marino * 3005a0b428SJohn Marino * The range testing costs speed, but as the rationale for the very 3105a0b428SJohn Marino * existence of this function is accuracy, we accept that. 3205a0b428SJohn Marino * 3305a0b428SJohn Marino * In order to reduce the cost for testing the range, we check if 3405a0b428SJohn Marino * the argument is in the range 3505a0b428SJohn Marino * -0.25 <= x <= 0.25 3605a0b428SJohn Marino * which can be done with just one conditional branch. If x is 3705a0b428SJohn Marino * inside this range, we use fyl2xp1. Outside of this range, 3805a0b428SJohn Marino * the use of fyl2x is accurate enough. 3905a0b428SJohn Marino * 4005a0b428SJohn Marino */ 4105a0b428SJohn Marino 4205a0b428SJohn Marino.text 4305a0b428SJohn Marino .align 4 4405a0b428SJohn MarinoENTRY(log1p) 4505a0b428SJohn Marino XMM_ONE_ARG_DOUBLE_PROLOGUE 4605a0b428SJohn Marino fldl ARG_DOUBLE_ONE 4705a0b428SJohn Marino fabs 4805a0b428SJohn Marino fld1 /* ... x 1 */ 4905a0b428SJohn Marino fadd %st(0) /* ... x 2 */ 5005a0b428SJohn Marino fadd %st(0) /* ... x 4 */ 5105a0b428SJohn Marino fld1 /* ... 4 1 */ 5205a0b428SJohn Marino fdivp /* ... x 0.25 */ 5305a0b428SJohn Marino fcompp 5405a0b428SJohn Marino fnstsw %ax 5505a0b428SJohn Marino andb $69,%ah 5605a0b428SJohn Marino jne use_fyl2x 5705a0b428SJohn Marino jmp use_fyl2xp1 5805a0b428SJohn Marino 5905a0b428SJohn Marino .align 4 6005a0b428SJohn Marinouse_fyl2x: 6105a0b428SJohn Marino fldln2 6205a0b428SJohn Marino fldl ARG_DOUBLE_ONE 6305a0b428SJohn Marino fld1 6405a0b428SJohn Marino faddp 6505a0b428SJohn Marino fyl2x 6605a0b428SJohn Marino XMM_DOUBLE_EPILOGUE 6705a0b428SJohn Marino ret 6805a0b428SJohn Marino 6905a0b428SJohn Marino .align 4 7005a0b428SJohn Marinouse_fyl2xp1: 7105a0b428SJohn Marino fldln2 7205a0b428SJohn Marino fldl ARG_DOUBLE_ONE 7305a0b428SJohn Marino fyl2xp1 7405a0b428SJohn Marino XMM_DOUBLE_EPILOGUE 7505a0b428SJohn Marino ret 76*a27bb01fSJohn MarinoEND(log1p) 77*a27bb01fSJohn Marino 78*a27bb01fSJohn Marino .section .note.GNU-stack,"",%progbits 79