xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/divrem_2.asm (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1dnl  PPC-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
2
3dnl  Copyright 2007, 2008 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                       cycles/limb
34C                       norm    frac
35C POWER3/PPC630
36C POWER4/PPC970         ?       ?
37C POWER5                37      ?
38C POWER6                62      ?
39C POWER6                30.5    ?
40
41C INPUT PARAMETERS
42C qp  = r3
43C fn  = r4
44C up  = r5
45C un  = r6
46C dp  = r7
47
48
49ifdef(`DARWIN',,`
50define(`r2',`r31')')		C FIXME!
51
52ASM_START()
53
54EXTERN_FUNC(mpn_invert_limb)
55
56PROLOGUE(mpn_divrem_2,toc)
57	mflr	r0
58	std	r23, -72(r1)
59	std	r24, -64(r1)
60	std	r25, -56(r1)
61	std	r26, -48(r1)
62	std	r27, -40(r1)
63	std	r28, -32(r1)
64	std	r29, -24(r1)
65	std	r30, -16(r1)
66	std	r31, -8(r1)
67	std	r0, 16(r1)
68	stdu	r1, -192(r1)
69	mr	r24, r3
70	mr	r25, r4
71	sldi	r0, r6, 3
72	add	r26, r5, r0
73	addi	r26, r26, -24
74	ld	r30, 8(r7)
75	ld	r28, 0(r7)
76	ld	r29, 16(r26)
77	ld	r31, 8(r26)
78
79ifelse(0,1,`
80	li	r23, 0
81	cmpld	cr7, r29, r30
82	blt	cr7, L(8)
83	bgt	cr7, L(9)
84	cmpld	cr0, r31, r28
85	blt	cr0, L(8)
86L(9):	subfc	r31, r28, r31
87	subfe	r29, r30, r29
88	li	r23, 1
89',`
90	li	r23, 0
91	cmpld	cr7, r29, r30
92	blt	cr7, L(8)
93	mfcr	r0
94	rlwinm	r0, r0, 30, 1
95	subfc	r9, r28, r31
96	addze.	r0, r0
97	nop
98	beq	cr0, L(8)
99	subfc	r31, r28, r31
100	subfe	r29, r30, r29
101	li	r23, 1
102')
103
104L(8):
105	add	r27, r25, r6
106	addic.	r27, r27, -3
107	blt	cr0, L(18)
108	mr	r3, r30
109	CALL(	mpn_invert_limb)
110	mulld	r10, r3, r30
111	mulhdu	r0, r3, r28
112	addc	r8, r10, r28
113	subfe	r11, r1, r1
114	addc	r10, r8, r0
115	addze.	r11, r11
116	blt	cr0, L(91)
117L(40):
118	subfc	r10, r30, r10
119	addme.	r11, r11
120	addi	r3, r3, -1
121	bge	cr0, L(40)
122L(91):
123	addi	r5, r27,  1
124	mtctr	r5
125	sldi	r0, r27, 3
126	add	r24, r24, r0
127	ALIGN(16)
128L(loop):
129	mulhdu	r8, r29, r3
130	mulld	r6, r29, r3
131	addc	r6, r6, r31
132	adde	r8, r8, r29
133	cmpd	cr7, r27, r25
134	mulld	r0, r30, r8
135	mulhdu	r11, r28, r8
136	mulld	r10, r28, r8
137	subf	r31, r0, r31
138	li	r7, 0
139	blt	cr7, L(60)
140	ld	r7, 0(r26)
141	addi	r26, r26, -8
142	nop
143L(60):	subfc	r7, r28, r7
144	subfe	r31, r30, r31
145	subfc	r7, r10, r7
146	subfe	r4, r11, r31
147	subfc	r9, r6, r4
148	subfe	r9, r1, r1
149	andc	r6, r28, r9
150	andc	r0, r30, r9
151	addc	r31, r7, r6
152	adde	r29, r4, r0
153	subf	r8, r9, r8
154	cmpld	cr7, r29, r30
155	bge-	cr7, L(fix)
156L(bck):	std	r8, 0(r24)
157	addi	r24, r24, -8
158	addi	r27, r27, -1
159	bdnz	L(loop)
160L(18):
161	std	r31, 8(r26)
162	std	r29, 16(r26)
163	mr	r3, r23
164	addi	r1, r1, 192
165	ld	r0, 16(r1)
166	mtlr	r0
167	ld	r23, -72(r1)
168	ld	r24, -64(r1)
169	ld	r25, -56(r1)
170	ld	r26, -48(r1)
171	ld	r27, -40(r1)
172	ld	r28, -32(r1)
173	ld	r29, -24(r1)
174	ld	r30, -16(r1)
175	ld	r31, -8(r1)
176	blr
177L(fix):
178	mfcr	r0
179	rlwinm	r0, r0, 30, 1
180	subfc	r9, r28, r31
181	addze.	r0, r0
182	beq	cr0, L(bck)
183	subfc	r31, r28, r31
184	subfe	r29, r30, r29
185	addi	r8, r8, 1
186	b	L(bck)
187EPILOGUE()
188