xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/divrem_2.asm (revision aceb213538ec08a74028e213127af18aa17bf1cf)
1dnl  PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C                       cycles/limb
23C                       norm    frac
24C POWER3/PPC630
25C POWER4/PPC970         ?       ?
26C POWER5                37      ?
27C POWER6                62      ?
28C POWER6                30.5    ?
29
30C INPUT PARAMETERS
31C qp  = r3
32C fn  = r4
33C up  = r5
34C un  = r6
35C dp  = r7
36
37
38ifdef(`DARWIN',,`
39define(`r2',`r31')')		C FIXME!
40
41ASM_START()
42
43EXTERN_FUNC(mpn_invert_limb)
44
45PROLOGUE(mpn_divrem_2)
46	mflr	r0
47	std	r23, -72(r1)
48	std	r24, -64(r1)
49	std	r25, -56(r1)
50	std	r26, -48(r1)
51	std	r27, -40(r1)
52	std	r28, -32(r1)
53	std	r29, -24(r1)
54	std	r30, -16(r1)
55	std	r31, -8(r1)
56	std	r0, 16(r1)
57	stdu	r1, -192(r1)
58	mr	r24, r3
59	mr	r25, r4
60	sldi	r0, r6, 3
61	add	r26, r5, r0
62	addi	r26, r26, -24
63	ld	r30, 8(r7)
64	ld	r28, 0(r7)
65	ld	r29, 16(r26)
66	ld	r31, 8(r26)
67
68ifelse(0,1,`
69	li	r23, 0
70	cmpld	cr7, r29, r30
71	blt	cr7, L(8)
72	bgt	cr7, L(9)
73	cmpld	cr0, r31, r28
74	blt	cr0, L(8)
75L(9):	subfc	r31, r28, r31
76	subfe	r29, r30, r29
77	li	r23, 1
78',`
79	li	r23, 0
80	cmpld	cr7, r29, r30
81	blt	cr7, L(8)
82	mfcr	r0
83	rlwinm	r0, r0, 30, 1
84	subfc	r9, r28, r31
85	addze.	r0, r0
86	nop
87	beq	cr0, L(8)
88	subfc	r31, r28, r31
89	subfe	r29, r30, r29
90	li	r23, 1
91')
92
93L(8):
94	add	r27, r25, r6
95	addic.	r27, r27, -3
96	blt	cr0, L(18)
97	mr	r3, r30
98	CALL(	mpn_invert_limb)
99	nop
100	mulld	r10, r3, r30
101	mulhdu	r0, r3, r28
102	addc	r8, r10, r28
103	subfe	r11, r1, r1
104	addc	r10, r8, r0
105	addze.	r11, r11
106	blt	cr0, L(91)
107L(40):
108	subfc	r10, r30, r10
109	addme.	r11, r11
110	addi	r3, r3, -1
111	bge	cr0, L(40)
112L(91):
113	addi	r5, r27,  1
114	mtctr	r5
115	sldi	r0, r27, 3
116	add	r24, r24, r0
117	ALIGN(16)
118L(loop):
119	mulhdu	r8, r29, r3
120	mulld	r6, r29, r3
121	addc	r6, r6, r31
122	adde	r8, r8, r29
123	cmpd	cr7, r27, r25
124	mulld	r0, r30, r8
125	mulhdu	r11, r28, r8
126	mulld	r10, r28, r8
127	subf	r31, r0, r31
128	li	r7, 0
129	blt	cr7, L(60)
130	ld	r7, 0(r26)
131	addi	r26, r26, -8
132	nop
133L(60):	subfc	r7, r28, r7
134	subfe	r31, r30, r31
135	subfc	r7, r10, r7
136	subfe	r4, r11, r31
137	subfc	r9, r6, r4
138	subfe	r9, r1, r1
139	andc	r6, r28, r9
140	andc	r0, r30, r9
141	addc	r31, r7, r6
142	adde	r29, r4, r0
143	subf	r8, r9, r8
144	cmpld	cr7, r29, r30
145	bge-	cr7, L(fix)
146L(bck):	std	r8, 0(r24)
147	addi	r24, r24, -8
148	addi	r27, r27, -1
149	bdnz	L(loop)
150L(18):
151	std	r31, 8(r26)
152	std	r29, 16(r26)
153	mr	r3, r23
154	addi	r1, r1, 192
155	ld	r0, 16(r1)
156	mtlr	r0
157	ld	r23, -72(r1)
158	ld	r24, -64(r1)
159	ld	r25, -56(r1)
160	ld	r26, -48(r1)
161	ld	r27, -40(r1)
162	ld	r28, -32(r1)
163	ld	r29, -24(r1)
164	ld	r30, -16(r1)
165	ld	r31, -8(r1)
166	blr
167L(fix):
168	mfcr	r0
169	rlwinm	r0, r0, 30, 1
170	subfc	r9, r28, r31
171	addze.	r0, r0
172	beq	cr0, L(bck)
173	subfc	r31, r28, r31
174	subfe	r29, r30, r29
175	addi	r8, r8, 1
176	b	L(bck)
177EPILOGUE()
178