xref: /dflybsd-src/contrib/openbsd_libm/arch/amd64/s_log1p.S (revision a27bb01fb90c87959b0c0dfacf7dd85d1308a35f)
105a0b428SJohn Marino/*	$OpenBSD: s_log1p.S,v 1.3 2009/04/08 23:31:34 martynas Exp $ */
205a0b428SJohn Marino/*
305a0b428SJohn Marino * Written by J.T. Conklin <jtc@NetBSD.org>.
405a0b428SJohn Marino * Public domain.
505a0b428SJohn Marino */
605a0b428SJohn Marino
705a0b428SJohn Marino/*
805a0b428SJohn Marino * Modified by Lex Wennmacher <wennmach@NetBSD.org>
905a0b428SJohn Marino * Still public domain.
1005a0b428SJohn Marino */
1105a0b428SJohn Marino
1205a0b428SJohn Marino#include <machine/asm.h>
1305a0b428SJohn Marino
1405a0b428SJohn Marino#include "abi.h"
1505a0b428SJohn Marino
1605a0b428SJohn Marino/*
1705a0b428SJohn Marino * The log1p() function is provided to compute an accurate value of
1805a0b428SJohn Marino * log(1 + x), even for tiny values of x. The i387 FPU provides the
1905a0b428SJohn Marino * fyl2xp1 instruction for this purpose. However, the range of this
2005a0b428SJohn Marino * instruction is limited to:
2105a0b428SJohn Marino * 		-(1 - (sqrt(2) / 2)) <= x <= sqrt(2) - 1
2205a0b428SJohn Marino *                         -0.292893 <= x <= 0.414214
2305a0b428SJohn Marino * at least on older processor versions.
2405a0b428SJohn Marino *
2505a0b428SJohn Marino * log1p() is implemented by testing the range of the argument.
2605a0b428SJohn Marino * If it is appropriate for fyl2xp1, this instruction is used.
2705a0b428SJohn Marino * Else, we compute log1p(x) = ln(2)*ld(1 + x) the traditional way
2805a0b428SJohn Marino * (using fyl2x).
2905a0b428SJohn Marino *
3005a0b428SJohn Marino * The range testing costs speed, but as the rationale for the very
3105a0b428SJohn Marino * existence of this function is accuracy, we accept that.
3205a0b428SJohn Marino *
3305a0b428SJohn Marino * In order to reduce the cost for testing the range, we check if
3405a0b428SJohn Marino * the argument is in the range
3505a0b428SJohn Marino *                             -0.25 <= x <= 0.25
3605a0b428SJohn Marino * which can be done with just one conditional branch. If x is
3705a0b428SJohn Marino * inside this range, we use fyl2xp1. Outside of this range,
3805a0b428SJohn Marino * the use of fyl2x is accurate enough.
3905a0b428SJohn Marino *
4005a0b428SJohn Marino */
4105a0b428SJohn Marino
4205a0b428SJohn Marino.text
4305a0b428SJohn Marino	.align	4
4405a0b428SJohn MarinoENTRY(log1p)
4505a0b428SJohn Marino	XMM_ONE_ARG_DOUBLE_PROLOGUE
4605a0b428SJohn Marino	fldl	ARG_DOUBLE_ONE
4705a0b428SJohn Marino	fabs
4805a0b428SJohn Marino	fld1				/* ... x 1 */
4905a0b428SJohn Marino	fadd	%st(0)			/* ... x 2 */
5005a0b428SJohn Marino	fadd	%st(0)			/* ... x 4 */
5105a0b428SJohn Marino	fld1				/* ... 4 1 */
5205a0b428SJohn Marino	fdivp				/* ... x 0.25 */
5305a0b428SJohn Marino	fcompp
5405a0b428SJohn Marino	fnstsw	%ax
5505a0b428SJohn Marino	andb	$69,%ah
5605a0b428SJohn Marino	jne	use_fyl2x
5705a0b428SJohn Marino	jmp	use_fyl2xp1
5805a0b428SJohn Marino
5905a0b428SJohn Marino	.align	4
6005a0b428SJohn Marinouse_fyl2x:
6105a0b428SJohn Marino	fldln2
6205a0b428SJohn Marino	fldl	ARG_DOUBLE_ONE
6305a0b428SJohn Marino	fld1
6405a0b428SJohn Marino	faddp
6505a0b428SJohn Marino	fyl2x
6605a0b428SJohn Marino	XMM_DOUBLE_EPILOGUE
6705a0b428SJohn Marino	ret
6805a0b428SJohn Marino
6905a0b428SJohn Marino	.align	4
7005a0b428SJohn Marinouse_fyl2xp1:
7105a0b428SJohn Marino	fldln2
7205a0b428SJohn Marino	fldl	ARG_DOUBLE_ONE
7305a0b428SJohn Marino	fyl2xp1
7405a0b428SJohn Marino	XMM_DOUBLE_EPILOGUE
7505a0b428SJohn Marino	ret
76*a27bb01fSJohn MarinoEND(log1p)
77*a27bb01fSJohn Marino
78*a27bb01fSJohn Marino	.section .note.GNU-stack,"",%progbits
79