xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/p6/gcd_11.asm (revision b2c35e17b976cf7ccd7250c86c6f5e95090ed636)
1dnl  x86 mpn_gcd_11 optimised for processors with fast BSF.
2
3dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked by Torbjorn Granlund.
4
5dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2015 Free Software
6dnl  Foundation, Inc.
7
8dnl  This file is part of the GNU MP Library.
9dnl
10dnl  The GNU MP Library is free software; you can redistribute it and/or modify
11dnl  it under the terms of either:
12dnl
13dnl    * the GNU Lesser General Public License as published by the Free
14dnl      Software Foundation; either version 3 of the License, or (at your
15dnl      option) any later version.
16dnl
17dnl  or
18dnl
19dnl    * the GNU General Public License as published by the Free Software
20dnl      Foundation; either version 2 of the License, or (at your option) any
21dnl      later version.
22dnl
23dnl  or both in parallel, as here.
24dnl
25dnl  The GNU MP Library is distributed in the hope that it will be useful, but
26dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28dnl  for more details.
29dnl
30dnl  You should have received copies of the GNU General Public License and the
31dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
32dnl  see https://www.gnu.org/licenses/.
33
34include(`../config.m4')
35
36
37C	     cycles/bit (approx)
38C AMD K7	 7.80
39C AMD K8,K9	 7.79
40C AMD K10	 4.08
41C AMD bd1	 ?
42C AMD bobcat	 7.82
43C Intel P4-2	14.9
44C Intel P4-3/4	14.0
45C Intel P6/13	 5.09
46C Intel core2	 4.22
47C Intel NHM	 5.00
48C Intel SBR	 5.00
49C Intel atom	17.1
50C VIA nano	?
51C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
52
53
54define(`u0',    `%eax')
55define(`v0',    `%edx')
56
57ASM_START()
58	TEXT
59	ALIGN(16)
60PROLOGUE(mpn_gcd_11)
61	push	%edi
62	push	%esi
63
64	mov	12(%esp), %eax
65	mov	16(%esp), %edx
66	jmp	L(odd)
67
68	ALIGN(16)		C               K10   BD    C2    NHM   SBR
69L(top):	cmovc(	%esi, %eax)	C u = |v - u|   0,3   0,3   0,6   0,5   0,5
70	cmovc(	%edi, %edx)	C v = min(u,v)  0,3   0,3   2,8   1,7   1,7
71	shr	%cl, %eax	C               1,7   1,6   2,8   2,8   2,8
72L(odd):	mov	%edx, %esi	C               1     1     4     3     3
73	sub	%eax, %esi	C               2     2     5     4     4
74	bsf	%esi, %ecx	C               3     3     6     5     5
75	mov	%eax, %edi	C               2     2     3     3     4
76	sub	%edx, %eax	C               2     2     4     3     4
77	jnz	L(top)		C
78
79L(end):	mov	%edx, %eax
80	pop	%esi
81	pop	%edi
82	ret
83EPILOGUE()
84