xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/ia64/gcd_11.asm (revision 32d1c65c71fbdb65a012e8392a62a757dd6853e9)
1dnl  Itanium-2 mpn_gcd_11
2
3dnl  Copyright 2002-2005, 2012, 2013, 2015, 2019 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C           cycles/bitpair (1x1 gcd)
35C Itanium:       ?
36C Itanium 2:     4.5
37
38
39ASM_START()
40
41C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
42
43deflit(MAXSHIFT, 7)
44deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
45
46	.rodata
47	ALIGN(m4_lshift(1,MAXSHIFT))	C align table to allow using dep
48ctz_table:
49	data1	MAXSHIFT
50forloop(i,1,MASK,
51`	data1	m4_count_trailing_zeros(i)-1
52')
53
54define(`x0', r32)
55define(`y0', r33)
56
57PROLOGUE(mpn_gcd_11)
58	.prologue
59	.body
60		addl	r22 = @ltoff(ctz_table), r1
61	;;
62		ld8	r22 = [r22]
63		br	L(ent)
64	;;
65
66	ALIGN(32)
67L(top):
68	.pred.rel "mutex", p6,p7
69 {.mmi;	(p7)	mov	y0 = x0
70	(p6)	sub	x0 = x0, y0
71		dep	r21 = r19, r22, 0, MAXSHIFT	C concat(table,lowbits)
72}{.mmi;		and	r20 = MASK, r19
73	(p7)	mov	x0 = r19
74		and	r23 = 6, r19
75	;;
76}{.mmi;		cmp.eq	p6,p0 = 4, r23
77		cmp.eq	p7,p0 = 0, r23
78		shr.u	x0 = x0, 1		C shift-by-1, always OK
79}{.mmb;		ld1	r16 = [r21]
80		cmp.eq	p10,p0 = 0, r20
81	(p10)	br.spnt.few.clr	 L(count_better)
82	;;
83}
84L(bck):
85	.pred.rel "mutex", p6,p7
86 {.mii;		nop	0
87	(p6)	shr.u	x0 = x0, 1		C u was ...100 before shift-by-1 above
88	(p7)	shr.u	x0 = x0, r16		C u was ...000 before shift-by-1 above
89	;;
90}
91L(ent):
92 {.mmi;		sub	r19 = y0, x0
93		cmp.gtu	p6,p7 = x0, y0
94		cmp.ne	p8,p0 = x0, y0
95}{.mmb;		nop	0
96		nop	0
97	(p8)	br.sptk.few.clr L(top)
98}
99
100L(end):		mov	r8 = y0
101		br.ret.sptk.many b0
102
103L(count_better):
104		add	r20 = -1, x0
105	;;
106		andcm	r23 = r20, x0
107	;;
108		popcnt	r16 = r23
109		br	L(bck)
110EPILOGUE()
111