xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/mode64/bdiv_q_1.asm (revision 72c7faa4dbb41dbb0238d6b4a109da0d4b236dd4)
1dnl  PowerPC-64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- Hensel division by 1-limb
2dnl  divisor.
3
4dnl  Copyright 2006, 2010, 2017 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34C			cycles/limb
35C			norm	unorm
36C POWER3/PPC630	       13-19
37C POWER4/PPC970		16
38C POWER5		16	16
39C POWER6		37	46
40C POWER7		12	12
41C POWER8		12	12
42
43C INPUT PARAMETERS
44define(`rp', `r3')
45define(`up', `r4')
46define(`n',  `r5')
47define(`d',  `r6')
48define(`di', `r7')
49define(`cnt',`r8')
50
51define(`tnc',`r10')
52
53ASM_START()
54
55EXTERN(binvert_limb_table)
56
57PROLOGUE(mpn_bdiv_q_1,toc)
58	addi	r7, n, -1
59	cmpdi	cr1, n, 1
60	ld	r12, 0(up)
61	li	cnt, 0
62	neg	r0, d
63	and	r0, d, r0
64	cntlzd	r0, r0
65	subfic	cnt, r0, 63
66	srd	d, d, cnt
67L(7):
68	mtctr	r7
69	LEA(	r10, binvert_limb_table)
70	rldicl	r11, d, 63, 57
71	lbzx	r0, r10, r11
72	mulld	r9, r0, r0
73	sldi	r0, r0, 1
74	mulld	r9, d, r9
75	subf	r0, r9, r0
76	mulld	r10, r0, r0
77	sldi	r0, r0, 1
78	mulld	r10, d, r10
79	subf	r0, r10, r0
80	mulld	r9, r0, r0
81	sldi	r0, r0, 1
82	mulld	r9, d, r9
83	subf	di, r9, r0		C di = 1/d mod 2^64
84ifdef(`AIX',
85`	C For AIX it is not clear how to jump into another function.
86	b	.mpn_pi1_bdiv_q_1
87',`
88	C For non-AIX, dispatch into the pi1 variant.
89	bne	cr0, L(norm)
90	b	L(unorm)
91')
92EPILOGUE()
93
94PROLOGUE(mpn_pi1_bdiv_q_1)
95	cmpdi	cr0, cnt, 0
96	ld	r12, 0(up)
97	addic	r0, n, -1		C set carry as side effect
98	cmpdi	cr1, n, 1
99	mtctr	r0
100	beq	cr0, L(norm)
101
102L(unorm):
103	subfic	tnc, cnt, 64		C set carry as side effect
104	li	r5, 0
105	srd	r11, r12, cnt
106	beq	cr1, L(ed1)
107
108	ALIGN(16)
109L(tpu):	ld	r12, 8(up)
110	nop
111	addi	up, up, 8
112	sld	r0, r12, tnc
113	or	r11, r11, r0
114	subfe	r9, r5, r11
115	srd	r11, r12, cnt
116	mulld	r0, di, r9
117	mulhdu	r5, r0, d
118	std	r0, 0(rp)
119	addi	rp, rp, 8
120	bdnz	L(tpu)
121
122	subfe	r11, r5, r11
123L(ed1):	mulld	r0, di, r11
124	std	r0, 0(rp)
125	blr
126
127	ALIGN(16)
128L(norm):
129	mulld	r11, r12, di
130	mulhdu	r5, r11, d
131	std	r11, 0(rp)
132	beqlr	cr1
133
134	ALIGN(16)
135L(tpn):	ld	r9, 8(up)
136	addi	up, up, 8
137	subfe	r5, r5, r9
138	mulld	r11, di, r5
139	mulhdu	r5, r11, d	C result not used in last iteration
140	std	r11, 8(rp)
141	addi	rp, rp, 8
142	bdnz	L(tpn)
143
144	blr
145EPILOGUE()
146ASM_END()
147