xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm/v6t2/divrem_1.asm (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1dnl  ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2012 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C		norm	unorm	frac
25C StrongARM	 ?
26C XScale	 ?
27C Cortex-A8	 ?
28C Cortex-A9	 13	 14	 13
29C Cortex-A15	 ?
30
31C TODO
32C  * Optimise inner-loops better, they could likely run a cycle or two faster.
33C  * Decrease register usage, streamline non-loop code.
34
35define(`qp_arg',  `r0')
36define(`fn',      `r1')
37define(`up_arg',  `r2')
38define(`n_arg',   `r3')
39define(`d_arg',   `0')
40define(`dinv_arg',`4')
41define(`cnt_arg', `8')
42
43define(`n',       `r9')
44define(`qp',      `r5')
45define(`up',      `r6')
46define(`cnt',     `r7')
47define(`tnc',     `r10')
48define(`dinv',    `r0')
49define(`d',       `r4')
50
51ASM_START()
52PROLOGUE(mpn_preinv_divrem_1)
53	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
54	ldr	d,    [sp, #9*4+d_arg]
55	ldr	cnt,  [sp, #9*4+cnt_arg]
56	str	r1, [sp, #9*4+d_arg]	C reuse d stack slot for fn
57	sub	n, r3, #1
58	add	r3, r1, n
59	cmp	d, #0
60	add	qp, qp_arg, r3, lsl #2	C put qp at Q[] end
61	add	up, up_arg, n, lsl #2	C put up at U[] end
62	ldr	dinv, [sp, #9*4+dinv_arg]
63	blt	L(nent)
64	b	L(uent)
65EPILOGUE()
66
67PROLOGUE(mpn_divrem_1)
68	stmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
69	sub	n, r3, #1
70	ldr	d, [sp, #9*4+d_arg]	C d
71	str	r1, [sp, #9*4+d_arg]	C reuse d stack slot for fn
72	add	r3, r1, n
73	cmp	d, #0
74	add	qp, qp_arg, r3, lsl #2	C put qp at Q[] end
75	add	up, up_arg, n, lsl #2	C put up at U[] end
76	blt	L(normalised)
77
78L(unnorm):
79	clz	cnt, d
80	mov	r0, d, lsl cnt		C pass d << cnt
81	bl	mpn_invert_limb
82L(uent):
83	mov	d, d, lsl cnt		C d <<= cnt
84	cmp	n, #0
85	mov	r1, #0			C r
86	blt	L(frac)
87
88	ldr	r11, [up, #0]
89
90	rsb	tnc, cnt, #32
91	mov	r1, r11, lsr tnc
92	mov	r11, r11, lsl cnt
93	beq	L(uend)
94
95	ldr	r3, [up, #-4]!
96	orr	r2, r11, r3, lsr tnc
97	b	L(mid)
98
99L(utop):
100	mls	r1, d, r8, r11
101	mov	r11, r3, lsl cnt
102	ldr	r3, [up, #-4]!
103	cmp	r1, r2
104	addhi	r1, r1, d
105	subhi	r8, r8, #1
106	orr	r2, r11, r3, lsr tnc
107	cmp	r1, d
108	bcs	L(ufx)
109L(uok):	str	r8, [qp], #-4
110L(mid):	add	r8, r1, #1
111	mov	r11, r2
112	umlal	r2, r8, r1, dinv
113	subs	n, n, #1
114	bne	L(utop)
115
116	mls	r1, d, r8, r11
117	mov	r11, r3, lsl cnt
118	cmp	r1, r2
119	addhi	r1, r1, d
120	subhi	r8, r8, #1
121	cmp	r1, d
122	rsbcs	r1, d, r1
123	addcs	r8, r8, #1
124	str	r8, [qp], #-4
125
126L(uend):add	r8, r1, #1
127	mov	r2, r11
128	umlal	r2, r8, r1, dinv
129	mls	r1, d, r8, r11
130	cmp	r1, r2
131	addhi	r1, r1, d
132	subhi	r8, r8, #1
133	cmp	r1, d
134	rsbcs	r1, d, r1
135	addcs	r8, r8, #1
136	str	r8, [qp], #-4
137L(frac):
138	ldr	r2, [sp, #9*4+d_arg]	C fn
139	cmp	r2, #0
140	beq	L(fend)
141
142L(ftop):mov	r6, #0
143	add	r3, r1, #1
144	umlal	r6, r3, r1, dinv
145	mov	r8, #0
146	mls	r1, d, r3, r8
147	cmp	r1, r6
148	addhi	r1, r1, d
149	subhi	r3, r3, #1
150	subs	r2, r2, #1
151	str	r3, [qp], #-4
152	bne	L(ftop)
153
154L(fend):mov	r11, r1, lsr cnt
155L(rtn):	mov	r0, r11
156	ldmfd	sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
157
158L(normalised):
159	mov	r0, d
160	bl	mpn_invert_limb
161L(nent):
162	cmp	n, #0
163	mov	r11, #0			C r
164	blt	L(nend)
165
166	ldr	r11, [up, #0]
167	cmp	r11, d
168	movlo	r2, #0			C hi q limb
169	movhs	r2, #1			C hi q limb
170	subhs	r11, r11, d
171
172	str	r2, [qp], #-4
173	cmp	n, #0
174	beq	L(nend)
175
176L(ntop):ldr	r1, [up, #-4]!
177	add	r12, r11, #1
178	umlal	r1, r12, r11, dinv
179	ldr	r3, [up, #0]
180	mls	r11, d, r12, r3
181	cmp	r11, r1
182	addhi	r11, r11, d
183	subhi	r12, r12, #1
184	cmp	d, r11
185	bls	L(nfx)
186L(nok):	str	r12, [qp], #-4
187	subs	n, n, #1
188	bne	L(ntop)
189
190L(nend):mov	r1, r11			C r
191	mov	cnt, #0			C shift cnt
192	b	L(frac)
193
194L(nfx):	add	r12, r12, #1
195	rsb	r11, d, r11
196	b	L(nok)
197L(ufx):	rsb	r1, d, r1
198	add	r8, r8, #1
199	b	L(uok)
200EPILOGUE()
201