xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm/aorslsh1_n.asm (revision 80d9064ac03cbb6a4174695f0d5b237c8766d3d0)
1dnl  ARM mpn_addlsh1_n and mpn_sublsh1_n
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2012 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C	      addlsh1_n       sublsh1_n
25C	     cycles/limb     cycles/limb
26C StrongARM	 ?		 ?
27C XScale	 ?		 ?
28C Cortex-A8	 ?		 ?
29C Cortex-A9	 3.12		 3.7
30C Cortex-A15	 ?		 ?
31
32C TODO
33C  * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
34C    The sublsh1_n code could surely be tweaked, its REVCY slows down things
35C    very much.  If two insns are really needed, it might help to separate them
36C    for better micro-parallelism.
37
38define(`rp', `r0')
39define(`up', `r1')
40define(`vp', `r2')
41define(`n',  `r3')
42
43ifdef(`OPERATION_addlsh1_n', `
44  define(`ADDSUB',	adds)
45  define(`ADDSUBC',	adcs)
46  define(`SETCY',	`cmp	$1, #1')
47  define(`RETVAL',	`adc	r0, $1, #2')
48  define(`SAVECY',	`sbc	$1, $2, #0')
49  define(`RESTCY',	`cmn	$1, #1')
50  define(`REVCY',	`')
51  define(`INICYR',	`mov	$1, #0')
52  define(`r10r11',	`r11')
53  define(`func',	mpn_addlsh1_n)
54  define(`func_nc',	mpn_addlsh1_nc)')
55ifdef(`OPERATION_sublsh1_n', `
56  define(`ADDSUB',	subs)
57  define(`ADDSUBC',	sbcs)
58  define(`SETCY',	`rsbs	$1, $1, #0')
59  define(`RETVAL',	`adc	r0, $1, #1')
60  define(`SAVECY',	`sbc	$1, $1, $1')
61  define(`RESTCY',	`cmn	$1, #1')
62  define(`REVCY',	`sbc	$1, $1, $1
63			cmn	$1, #1')
64  define(`INICYR',	`mvn	$1, #0')
65  define(`r10r11',	`r10')
66  define(`func',	mpn_sublsh1_n)
67  define(`func_nc',	mpn_sublsh1_nc)')
68
69MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
70
71ASM_START()
72PROLOGUE(func)
73	push	{r4-r10r11, r14}
74
75ifdef(`OPERATION_addlsh1_n', `
76	mvn	r11, #0
77')
78	INICYR(	r14)
79	subs	n, n, #3
80	blt	L(le2)			C carry clear on branch path
81
82	cmn	r0, #0			C clear carry
83	ldmia	vp!, {r8, r9, r10}
84	b	L(mid)
85
86L(top):	RESTCY(	r14)
87	ADDSUBC	r4, r4, r8
88	ADDSUBC	r5, r5, r9
89	ADDSUBC	r6, r6, r10
90	ldmia	vp!, {r8, r9, r10}
91	stmia	rp!, {r4, r5, r6}
92	REVCY(r14)
93	adcs	r8, r8, r8
94	adcs	r9, r9, r9
95	adcs	r10, r10, r10
96	ldmia	up!, {r4, r5, r6}
97	SAVECY(	r14, r11)
98	subs	n, n, #3
99	blt	L(exi)
100	RESTCY(	r12)
101	ADDSUBC	r4, r4, r8
102	ADDSUBC	r5, r5, r9
103	ADDSUBC	r6, r6, r10
104	ldmia	vp!, {r8, r9, r10}
105	stmia	rp!, {r4, r5, r6}
106	REVCY(r12)
107L(mid):	adcs	r8, r8, r8
108	adcs	r9, r9, r9
109	adcs	r10, r10, r10
110	ldmia	up!, {r4, r5, r6}
111	SAVECY(	r12, r11)
112	subs	n, n, #3
113	bge	L(top)
114
115	mov	r7, r12			C swap alternating...
116	mov	r12, r14		C ...carry-save...
117	mov	r14, r7			C ...registers
118
119L(exi):	RESTCY(	r12)
120	ADDSUBC	r4, r4, r8
121	ADDSUBC	r5, r5, r9
122	ADDSUBC	r6, r6, r10
123	stmia	rp!, {r4, r5, r6}
124
125	REVCY(r12)
126L(le2):	tst	n, #1			C n = {-1,-2,-3} map to [2], [1], [0]
127	beq	L(e1)
128
129L(e02):	tst	n, #2
130	beq	L(rt0)
131	ldm	vp, {r8, r9}
132	adcs	r8, r8, r8
133	adcs	r9, r9, r9
134	ldm	up, {r4, r5}
135	SAVECY(	r12, r11)
136	RESTCY(	r14)
137	ADDSUBC	r4, r4, r8
138	ADDSUBC	r5, r5, r9
139	stm	rp, {r4, r5}
140	b	L(rt1)
141
142L(e1):	ldr	r8, [vp]
143	adcs	r8, r8, r8
144	ldr	r4, [up]
145	SAVECY(	r12, r11)
146	RESTCY(	r14)
147	ADDSUBC	r4, r4, r8
148	str	r4, [rp]
149
150L(rt1):	mov	r14, r12
151	REVCY(r12)
152L(rt0):	RETVAL(	r14)
153	pop	{r4-r10r11, r14}
154ifdef(`ARM_THUMB_MODE',
155`	bx	r14
156',`	mov	pc, r14
157')
158EPILOGUE()
159