xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/p9/gcd_22.asm (revision 4ac76180e904e771b9d522c7e57296d371f06499)
1dnl  PowerPC-64 mpn_gcd_22 optimised for POWER9.
2
3dnl  Copyright 2000-2002, 2005, 2009, 2011-2013, 2019 Free Software Foundation,
4dnl  Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34C		    cycles/bit (approx)
35C POWER3/PPC630		 -
36C POWER4/PPC970		 -
37C POWER5		 -
38C POWER6		 -
39C POWER7		 -
40C POWER8		 -
41C POWER9		 9.58
42
43C We define SLOW if this target uses a slow struct return mechanism, with
44C r3 as an implicit parameter for the struct pointer.
45undefine(`SLOW')dnl
46ifdef(`AIX',`define(`SLOW',`due to AIX')',`
47  ifdef(`DARWIN',,`
48    ifdef(`ELFv2_ABI',,`define(`SLOW',`due to ELFv1')')dnl
49  ')
50')
51
52ifdef(`SLOW',`
53define(`IFSLOW', `$1')
54define(`u1',    `r4')
55define(`u0',    `r5')
56define(`v1',    `r6')
57define(`v0',    `r7')
58',`
59define(`IFSLOW', `')
60define(`u1',    `r3')
61define(`u0',    `r4')
62define(`v1',    `r5')
63define(`v0',    `r6')
64')
65
66define(`tmp',   `r0')
67define(`t0',    `r8')
68define(`t1',    `r9')
69define(`s0',    `r10')
70define(`s1',    `r11')
71define(`cnt',   `r12')
72
73ASM_START()
74PROLOGUE(mpn_gcd_22)
75	cmpld	cr7, v0, u0
76L(top):	subfc	t0, v0, u0		C 0 12
77	beq	cr7, L(lowz)
78	subfe	t1, v1, u1		C 2 14
79	subfe.	tmp, tmp, tmp		C 4	set cr0 from the carry bit
80	subfc	s0, u0, v0		C 0
81	subfe	s1, u1, v1		C 2
82
83L(bck):	cnttzd	cnt, t0			C 2
84	subfic	tmp, cnt, 64		C 4
85
86	isel	v0, v0, u0, 2		C 6	use condition set by subfe
87	isel	u0, t0, s0, 2		C 6
88	isel	v1, v1, u1, 2		C 6
89	isel	u1, t1, s1, 2		C 6
90
91	srd	u0, u0, cnt		C 8
92	sld	tmp, u1, tmp		C 8
93	srd	u1, u1, cnt		C 8
94	or	u0, u0, tmp		C 10
95
96	or.	r0, u1, v1		C 10
97	cmpld	cr7, v0, u0
98	bne	L(top)
99
100
101	b	L(odd)
102	ALIGN(16)
103L(top1):isel	v0, u0, v0, 29		C v = min(u,v)
104	isel	u0, r10, r11, 29	C u = |u - v|
105	srd	u0, u0, cnt
106L(odd):	subf	r10, u0, v0		C r10 = v - u
107	subf	r11, v0, u0		C r11 = u - v
108	cmpld	cr7, v0, u0
109	cnttzd	cnt, r10
110	bne	cr7, L(top1)
111
112ifdef(`SLOW',`
113	std	v0, 0(r3)
114	std	r10, 8(r3)
115',`
116	mr	r3, v0
117	li	r4, 0
118')
119	blr
120
121
122L(lowz):C We come here when v0 - u0 = 0
123	C 1. If v1 - u1 = 0, then gcd is u = v.
124	C 2. Else compute gcd_21({v1,v0}, |u1-v1|)
125	subfc.	t0, v1, u1		C 2 8
126	beq	L(end)
127	li	t1, 0
128	subfe.	tmp, tmp, tmp		C 4	set cr0 from the carry bit
129	subf	s0, u1, v1		C 2
130	li	s1, 0
131	b	L(bck)
132
133L(end):
134ifdef(`SLOW',`
135	std	v0, 0(r3)
136	std	v1, 8(r3)
137	blr
138',`
139	mr	r3, v0
140	mr	r4, v1
141	blr
142')
143EPILOGUE()
144