xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm/v5/mod_1_1.asm (revision 6d322f2f4598f0d8a138f10ea648ec4fabe41f8b)
1dnl  ARM mpn_mod_1_1p
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2012 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C	     cycles/limb
25C StrongARM	 ?
26C XScale	 ?
27C Cortex-A8	 ?
28C Cortex-A9	 7
29C Cortex-A15	 6
30
31define(`ap', `r0')
32define(`n',  `r1')
33define(`d',  `r2')
34define(`cps',`r3')
35
36ASM_START()
37PROLOGUE(mpn_mod_1_1p)
38	push	{r4-r10}
39	add	r0, r0, r1, asl #2
40	ldr	r5, [r0, #-4]!
41	ldr	r12, [r0, #-4]!
42	subs	r1, r1, #2
43	ble	L(4)
44	ldr	r8, [r3, #12]
45	mov	r4, r12
46	mov	r10, r5
47	umull	r7, r5, r10, r8
48	sub	r1, r1, #1
49	b	L(mid)
50
51L(top):	adds	r12, r6, r7
52	adcs	r10, r4, r5
53	sub	r1, r1, #1
54	mov	r6, #0
55	movcs	r6, r8
56	umull	r7, r5, r10, r8
57	adds	r4, r12, r6
58	subcs	r4, r4, r2
59L(mid):	ldr	r6, [r0, #-4]!
60	teq	r1, #0
61	bne	L(top)
62
63	adds	r12, r6, r7
64	adcs	r5, r4, r5
65	subcs	r5, r5, r2
66L(4):	ldr	r1, [r3, #4]
67	cmp	r1, #0
68	beq	L(7)
69	ldr	r4, [r3, #8]
70	umull	r0, r6, r5, r4
71	adds	r12, r0, r12
72	addcs	r6, r6, #1
73	rsb	r0, r1, #32
74	mov	r0, r12, lsr r0
75	orr	r5, r0, r6, asl r1
76	mov	r12, r12, asl r1
77	b	L(8)
78L(7):	cmp	r5, r2
79	subcs	r5, r5, r2
80L(8):	ldr	r0, [r3, #0]
81	umull	r4, r3, r5, r0
82	add	r5, r5, #1
83	adds	r0, r4, r12
84	adc	r5, r3, r5
85	mul	r5, r2, r5
86	sub	r12, r12, r5
87	cmp	r12, r0
88	addhi	r12, r12, r2
89	cmp	r2, r12
90	subls	r12, r12, r2
91	mov	r0, r12, lsr r1
92	pop	{r4-r10}
93	bx	r14
94EPILOGUE()
95
96PROLOGUE(mpn_mod_1_1p_cps)
97	stmfd	sp!, {r4, r5, r6, r14}
98	mov	r5, r0
99	clz	r4, r1
100	mov	r0, r1, asl r4
101	rsb	r6, r0, #0
102	bl	mpn_invert_limb
103	str	r0, [r5, #0]
104	str	r4, [r5, #4]
105	cmp	r4, #0
106	beq	L(2)
107	rsb	r1, r4, #32
108	mov	r3, #1
109	mov	r3, r3, asl r4
110	orr	r3, r3, r0, lsr r1
111	mul	r3, r6, r3
112	mov	r4, r3, lsr r4
113	str	r4, [r5, #8]
114L(2):	mul	r0, r6, r0
115	str	r0, [r5, #12]
116	ldmfd	sp!, {r4, r5, r6, pc}
117EPILOGUE()
118