xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/p7/gcd_22.asm (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1dnl  PowerPC-64 mpn_gcd_22 optimised for POWER7 and POWER8.
2
3dnl  Copyright 2000-2002, 2005, 2009, 2011-2013, 2019 Free Software Foundation,
4dnl  Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34C		    cycles/bit (approx)
35C POWER3/PPC630		 -
36C POWER4/PPC970		 -
37C POWER5		 -
38C POWER6		 -
39C POWER7		12.3
40C POWER8		13.4
41C POWER9		10.6
42
43C We define SLOW if this target uses a slow struct return mechanism, with
44C r3 as an implicit parameter for the struct pointer.
45undefine(`SLOW')dnl
46ifdef(`AIX',`define(`SLOW',`due to AIX')',`
47  ifdef(`DARWIN',,`
48    ifdef(`ELFv2_ABI',,`define(`SLOW',`due to ELFv1')')dnl
49  ')
50')
51
52ifdef(`SLOW',`
53define(`IFSLOW', `$1')
54define(`u1',    `r4')
55define(`u0',    `r5')
56define(`v1',    `r6')
57define(`v0',    `r7')
58',`
59define(`IFSLOW', `')
60define(`u1',    `r3')
61define(`u0',    `r4')
62define(`v1',    `r5')
63define(`v0',    `r6')
64')
65
66define(`tmp',   `r0')
67define(`t0',    `r8')
68define(`t1',    `r9')
69define(`s0',    `r10')
70define(`s1',    `r11')
71define(`cnt',   `r12')
72
73ASM_START()
74PROLOGUE(mpn_gcd_22)
75L(top):	subfc.	t0, v0, u0		C 0 12
76	beq	cr0, L(lowz)
77	subfe	t1, v1, u1		C 2 14
78	subfe.	tmp, tmp, tmp		C 4	set cr0 from the carry bit
79	subfc	s0, u0, v0		C 0
80	subfe	s1, u1, v1		C 2
81
82L(bck):	and	tmp, s0, t0		C 2
83	cntlzd	cnt, tmp		C 4
84	addi	tmp, cnt, 1		C 6
85	subfic	cnt, cnt, 63		C 6
86
87	isel	v0, v0, u0, 2		C 6	use condition set by subfe
88	isel	v1, v1, u1, 2		C 6
89	isel	u0, t0, s0, 2		C 6
90	isel	u1, t1, s1, 2		C 6
91
92	srd	u0, u0, cnt		C 8
93	sld	tmp, u1, tmp		C 8
94	srd	u1, u1, cnt		C 8
95	or	u0, u0, tmp		C 10
96
97	or.	r0, u1, v1		C 10
98	bne	L(top)
99
100
101	li	r0, 63
102	b	L(odd)
103	ALIGN(16)
104L(top1):isel	v0, u0, v0, 29		C v = min(u,v)
105	isel	u0, r10, r11, 29	C u = |u - v|
106	subf	cnt, cnt, r0		C cnt = 63-cnt
107	srd	u0, u0, cnt
108L(odd):	subf	r10, u0, v0		C r10 = v - u
109	subf	r11, v0, u0		C r11 = u - v
110	cmpld	cr7, v0, u0
111	and	r8, r11, r10		C isolate lsb
112	cntlzd	cnt, r8
113	bne	cr7, L(top1)
114
115ifdef(`SLOW',`
116	std	v0, 0(r3)
117	std	r10, 8(r3)		C zero
118',`
119	mr	r3, v0
120	li	r4, 0
121')
122	blr
123
124
125L(lowz):C We come here when v0 - u0 = 0
126	C 1. If v1 - u1 = 0, then gcd is u = v.
127	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
128	subfc.	t0, v1, u1		C 2 8
129	beq	L(end)
130	li	t1, 0
131	subfe.	tmp, tmp, tmp		C 4	set cr0 from the carry bit
132	subf	s0, u1, v1		C 2
133	li	s1, 0
134	b	L(bck)
135
136L(end):
137ifdef(`SLOW',`
138	std	v0, 0(r3)
139	std	v1, 8(r3)
140	blr
141',`
142	mr	r3, v0
143	mr	r4, v1
144	blr
145')
146EPILOGUE()
147