xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/divrem_2.asm (revision c5e820cae412164fcbee52f470436200af5358ea)
1dnl  PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C			cycles/limb
23C			norm	frac
24C POWER3/PPC630
25C POWER4/PPC970		39*	39*
26C POWER5		39*	39*
27
28C STATUS
29C  * Performace fluctuates like crazy
30
31C INPUT PARAMETERS
32C qp  = r3
33C fn  = r4
34C up  = r5
35C un  = r6
36C dp  = r7
37
38
39ifdef(`DARWIN',,`
40define(`r2',`r31')')		C FIXME!
41
42ASM_START()
43
44EXTERN_FUNC(mpn_invert_limb)
45
46PROLOGUE(mpn_divrem_2)
47	mflr	r0
48	std	r23, -72(r1)
49	std	r24, -64(r1)
50	std	r25, -56(r1)
51	std	r26, -48(r1)
52	std	r27, -40(r1)
53	std	r28, -32(r1)
54	std	r29, -24(r1)
55	std	r30, -16(r1)
56	std	r31, -8(r1)
57	std	r0, 16(r1)
58	stdu	r1, -192(r1)
59	mr	r24, r3
60	mr	r25, r4
61	sldi	r0, r6, 3
62	add	r26, r5, r0
63	addi	r26, r26, -24
64	ld	r30, 8(r7)
65	ld	r28, 0(r7)
66	ld	r29, 16(r26)
67	ld	r31, 8(r26)
68
69ifelse(0,1,`
70	li	r23, 0
71	cmpld	cr7, r29, r30
72	blt	cr7, L(8)
73	bgt	cr7, L(9)
74	cmpld	cr0, r31, r28
75	blt	cr0, L(8)
76L(9):	subfc	r31, r28, r31
77	subfe	r29, r30, r29
78	li	r23, 1
79',`
80	li	r23, 0
81	cmpld	cr7, r29, r30
82	blt	cr7, L(8)
83	mfcr	r0
84	rlwinm	r0, r0, 30, 1
85	subfc	r9, r28, r31
86	addze.	r0, r0
87	nop
88	beq	cr0, L(8)
89	subfc	r31, r28, r31
90	subfe	r29, r30, r29
91	li	r23, 1
92')
93
94L(8):
95	add	r27, r25, r6
96	addic.	r27, r27, -3
97	blt	cr0, L(18)
98	mr	r3, r30
99	CALL(	mpn_invert_limb)
100	nop
101	mulld	r10, r3, r30
102	mulhdu	r0, r3, r28
103	addc	r8, r10, r28
104	subfe	r11, r1, r1
105	addc	r10, r8, r0
106	addze.	r11, r11
107	blt	cr0, L(91)
108L(40):
109	subfc	r10, r30, r10
110	addme.	r11, r11
111	addi	r3, r3, -1
112	bge	cr0, L(40)
113L(91):
114	addi	r5, r27,  1
115	mtctr	r5
116	sldi	r0, r27, 3
117	add	r24, r24, r0
118	ALIGN(16)
119L(loop):
120	mulhdu	r8, r29, r3
121	mulld	r6, r29, r3
122	addc	r6, r6, r31
123	adde	r8, r8, r29
124	mulld	r0, r30, r8
125	subf	r31, r0, r31
126	mulhdu	r11, r28, r8
127	mulld	r10, r28, r8
128	li	r7, 0
129	cmpd	cr7, r27, r25
130	blt	cr7, L(60)
131	ld	r7, 0(r26)
132	addi	r26, r26, -8
133	nop
134L(60):	subfc	r7, r28, r7
135	subfe	r31, r30, r31
136	subfc	r7, r10, r7
137	subfe	r4, r11, r31
138	subfc	r9, r6, r4
139	subfe	r9, r1, r1
140	andc	r6, r28, r9
141	andc	r0, r30, r9
142	addc	r31, r7, r6
143	adde	r29, r4, r0
144	subf	r8, r9, r8
145	cmpld	cr7, r29, r30
146	bge-	cr7, L(fix)
147L(bck):	std	r8, 0(r24)
148	addi	r24, r24, -8
149	addi	r27, r27, -1
150	bdnz	L(loop)
151L(18):
152	std	r31, 8(r26)
153	std	r29, 16(r26)
154	mr	r3, r23
155	addi	r1, r1, 192
156	ld	r0, 16(r1)
157	mtlr	r0
158	ld	r23, -72(r1)
159	ld	r24, -64(r1)
160	ld	r25, -56(r1)
161	ld	r26, -48(r1)
162	ld	r27, -40(r1)
163	ld	r28, -32(r1)
164	ld	r29, -24(r1)
165	ld	r30, -16(r1)
166	ld	r31, -8(r1)
167	blr
168L(fix):
169	mfcr	r0
170	rlwinm	r0, r0, 30, 1
171	subfc	r9, r28, r31
172	addze.	r0, r0
173	beq	cr0, L(bck)
174	subfc	r31, r28, r31
175	subfe	r29, r30, r29
176	addi	r8, r8, 1
177	b	L(bck)
178EPILOGUE()
179