xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm/v6/dive_1.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  ARM v6 mpn_divexact_1
2
3dnl  Contributed to the GNU project by Torbjörn Granlund.
4
5dnl  Copyright 2012, 2013 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33include(`../config.m4')
34
35C               cycles/limb       cycles/limb
36C               norm    unorm    modexact_1c_odd
37C StrongARM	 -	 -
38C XScale	 -	 -
39C Cortex-A7	 ?	 ?
40C Cortex-A8	 ?	 ?
41C Cortex-A9	 9	10		 9
42C Cortex-A15	 7	 7		 7
43
44C Architecture requirements:
45C v5	-
46C v5t	clz
47C v5te	-
48C v6	umaal
49C v6t2	-
50C v7a	-
51
52define(`rp', `r0')
53define(`up', `r1')
54define(`n',  `r2')
55define(`d',  `r3')
56
57define(`cy',  `r7')
58define(`cnt', `r6')
59define(`tnc', `r10')
60
61ASM_START()
62PROLOGUE(mpn_divexact_1)
63	push	{r4,r5,r6,r7,r8,r9}
64
65	tst	d, #1
66
67	rsb	r4, d, #0
68	and	r4, r4, d
69	clz	r4, r4
70	rsb	cnt, r4, #31		C count_trailing_zeros
71	mov	d, d, lsr cnt
72
73C binvert limb
74	LEA(	r4, binvert_limb_table)
75	and	r12, d, #254
76	ldrb	r4, [r4, r12, lsr #1]
77	mul	r12, r4, r4
78	mul	r12, d, r12
79	rsb	r12, r12, r4, lsl #1
80	mul	r4, r12, r12
81	mul	r4, d, r4
82	rsb	r4, r4, r12, lsl #1	C r4 = inverse
83
84	ldr	r5, [up], #4		C up[0]
85	mov	cy, #0
86	rsb	r8, r4, #0		C r8 = -inverse
87	beq	L(unnorm)
88
89L(norm):
90	subs	n, n, #1
91	mul	r5, r5, r4
92	beq	L(end)
93
94	ALIGN(16)
95L(top):	ldr	r9, [up], #4
96	mov	r12, #0
97	str	r5, [rp], #4
98	umaal	r12, cy, r5, d
99	mul	r5, r9, r4
100	mla	r5, cy, r8, r5
101	subs	n, n, #1
102	bne	L(top)
103
104L(end):	str	r5, [rp]
105	pop	{r4,r5,r6,r7,r8,r9}
106	bx	r14
107
108L(unnorm):
109	push	{r10,r11}
110	rsb	tnc, cnt, #32
111	mov	r11, r5, lsr cnt
112	subs	n, n, #1
113	beq	L(edx)
114
115	ldr	r12, [up], #4
116	orr	r9, r11, r12, lsl tnc
117	mov	r11, r12, lsr cnt
118	mul	r5, r9, r4
119	subs	n, n, #1
120	beq	L(edu)
121
122	ALIGN(16)
123L(tpu):	ldr	r12, [up], #4
124	orr	r9, r11, r12, lsl tnc
125	mov	r11, r12, lsr cnt
126	mov	r12, #0
127	str	r5, [rp], #4
128	umaal	r12, cy, r5, d
129	mul	r5, r9, r4
130	mla	r5, cy, r8, r5
131	subs	n, n, #1
132	bne	L(tpu)
133
134L(edu):	str	r5, [rp], #4
135	mov	r12, #0
136	umaal	r12, cy, r5, d
137	mul	r5, r11, r4
138	mla	r5, cy, r8, r5
139	str	r5, [rp]
140	pop	{r10,r11}
141	pop	{r4,r5,r6,r7,r8,r9}
142	bx	r14
143
144L(edx):	mul	r5, r11, r4
145	str	r5, [rp]
146	pop	{r10,r11}
147	pop	{r4,r5,r6,r7,r8,r9}
148	bx	r14
149EPILOGUE()
150