xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/mode1o.asm (revision 19ef5b5b0bcb90f63509df6e78769de1b57c2758)
1dnl  PowerPC-64 mpn_modexact_1_odd -- mpn by limb exact remainder.
2
3dnl  Copyright 2006 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C                  cycles/limb
23C POWER3/PPC630        13-19
24C POWER4/PPC970         16
25C POWER5                16
26C POWER6                 ?
27C POWER7                12
28
29C TODO
30C  * Check if n=1 code is really an improvement.  It probably isn't.
31C  * Make more similar to dive_1.asm.
32
33C INPUT PARAMETERS
34define(`up', `r3')
35define(`n',  `r4')
36define(`d',  `r5')
37define(`cy', `r6')
38
39
40ASM_START()
41
42EXTERN(binvert_limb_table)
43
44PROLOGUE(mpn_modexact_1c_odd)
45	addic.	n, n, -1		C set carry as side effect
46	ld	r8, 0(up)
47	bne	cr0, L(2)
48	cmpld	cr7, r6, r8
49	bge	cr7, L(4)
50	subf	r8, r6, r8
51	divdu	r3, r8, d
52	mulld	r3, r3, d
53	subf.	r3, r3, r8
54	beqlr	cr0
55	subf	r3, r3, d
56	blr
57
58L(4):	subf	r3, r8, r6
59	divdu	r8, r3, d
60	mulld	r8, r8, d
61	subf	r3, r8, r3
62	blr
63
64L(2):	LEA(	r7, binvert_limb_table)
65	rldicl	r9, d, 63, 57
66	mtctr	n
67	lbzx	r0, r7, r9
68	mulld	r7, r0, r0
69	sldi	r0, r0, 1
70	mulld	r7, d, r7
71	subf	r0, r7, r0
72	mulld	r9, r0, r0
73	sldi	r0, r0, 1
74	mulld	r9, d, r9
75	subf	r0, r9, r0
76	mulld	r7, r0, r0
77	sldi	r0, r0, 1
78	mulld	r7, d, r7
79	subf	r9, r7, r0
80
81	ALIGN(16)
82L(loop):
83	subfe	r0, r6, r8
84	ld	r8, 8(up)
85	addi	up, up, 8
86	mulld	r0, r9, r0
87	mulhdu	r6, r0, d
88	bdnz	L(loop)
89
90	cmpld	cr7, d, r8
91	blt	cr7, L(10)
92
93	subfe	r0, r0, r0
94	subf	r6, r0, r6
95	cmpld	cr7, r6, r8
96	subf	r3, r8, r6
97	bgelr	cr7
98	add	r3, d, r3
99	blr
100
101L(10):	subfe	r0, r6, r8
102	mulld	r0, r9, r0
103	mulhdu	r3, r0, d
104	blr
105EPILOGUE()
106ASM_END()
107