xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/gcd_11.asm (revision 924795e69c8bb3f17afd8fcbb799710cc1719dc4)
1dnl  AMD64 mpn_gcd_11 -- 1 x 1 gcd.
2
3dnl  Based on the K7 gcd_1.asm, by Kevin Ryde.  Rehacked for AMD64 by Torbjorn
4dnl  Granlund.
5
6dnl  Copyright 2000-2002, 2005, 2009, 2011, 2012, 2017 Free Software
7dnl  Foundation, Inc.
8
9dnl  This file is part of the GNU MP Library.
10dnl
11dnl  The GNU MP Library is free software; you can redistribute it and/or modify
12dnl  it under the terms of either:
13dnl
14dnl    * the GNU Lesser General Public License as published by the Free
15dnl      Software Foundation; either version 3 of the License, or (at your
16dnl      option) any later version.
17dnl
18dnl  or
19dnl
20dnl    * the GNU General Public License as published by the Free Software
21dnl      Foundation; either version 2 of the License, or (at your option) any
22dnl      later version.
23dnl
24dnl  or both in parallel, as here.
25dnl
26dnl  The GNU MP Library is distributed in the hope that it will be useful, but
27dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
28dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
29dnl  for more details.
30dnl
31dnl  You should have received copies of the GNU General Public License and the
32dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
33dnl  see https://www.gnu.org/licenses/.
34
35include(`../config.m4')
36
37
38C	     cycles/bit
39C AMD K8,K9	 5.5
40C AMD K10	 ?
41C AMD bd1	 ?
42C AMD bd2	 ?
43C AMD bd3	 ?
44C AMD bd4	 ?
45C AMD bt1	 7.1
46C AMD bt2	 ?
47C AMD zn1	 ?
48C AMD zn2	 ?
49C Intel P4	 ?
50C Intel CNR	 ?
51C Intel PNR	 ?
52C Intel NHM	 ?
53C Intel WSM	 ?
54C Intel SBR	 ?
55C Intel IBR	 ?
56C Intel HWL	 ?
57C Intel BWL	 ?
58C Intel SKL	 ?
59C Intel atom	 9.1
60C Intel SLM	 6.9
61C Intel GLM	 6.0
62C Intel GLM+	 5.8
63C VIA nano	 ?
64
65
66C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
67
68deflit(MAXSHIFT, 7)
69deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
70
71DEF_OBJECT(ctz_table,64)
72	.byte	MAXSHIFT
73forloop(i,1,MASK,
74`	.byte	m4_count_trailing_zeros(i)
75')
76END_OBJECT(ctz_table)
77
78define(`u0',    `%rdi')
79define(`v0',    `%rsi')
80
81ABI_SUPPORT(DOS64)
82ABI_SUPPORT(STD64)
83
84ASM_START()
85	TEXT
86	ALIGN(64)
87PROLOGUE(mpn_gcd_11)
88	FUNC_ENTRY(2)
89	LEA(	ctz_table, %r8)
90	jmp	L(ent)
91
92	ALIGN(16)
93L(top):	cmovc	%rdx, u0		C u = |u - v|
94	cmovc	%rax, v0		C v = min(u,v)
95L(mid):	and	$MASK, R32(%rdx)
96	movzbl	(%r8,%rdx), R32(%rcx)
97	jz	L(shift_alot)
98	shr	R8(%rcx), u0
99L(ent):	mov	u0, %rax
100	mov	v0, %rdx
101	sub	u0, %rdx
102	sub	v0, u0
103	jnz	L(top)
104
105L(end):	C rax = result
106	C rdx = 0 for the benefit of internal gcd_22 call
107	FUNC_EXIT()
108	ret
109
110L(shift_alot):
111	shr	$MAXSHIFT, u0
112	mov	u0, %rdx
113	jmp	L(mid)
114EPILOGUE()
115