xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm/v6/mul_2.asm (revision d909946ca08dceb44d7d0f22ec9488679695d976)
1dnl  ARM mpn_mul_2.
2
3dnl  Contributed to the GNU project by Torbjorn Granlund.
4
5dnl  Copyright 2012 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of the GNU Lesser General Public License as published
11dnl  by the Free Software Foundation; either version 3 of the License, or (at
12dnl  your option) any later version.
13
14dnl  The GNU MP Library is distributed in the hope that it will be useful, but
15dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
17dnl  License for more details.
18
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24C	     cycles/limb
25C StrongARM:	 -
26C XScale	 -
27C Cortex-A8	 ?
28C Cortex-A9	 2.25
29C Cortex-A15	 ?
30
31C TODO
32C  * This is a trivial edit of the addmul_2 code.  Check for simplifications,
33C    and possible speedups to 2.0 c/l.
34
35define(`rp',`r0')
36define(`up',`r1')
37define(`n', `r2')
38define(`vp',`r3')
39
40define(`v0',`r6')
41define(`v1',`r7')
42define(`u0',`r3')
43define(`u1',`r9')
44
45define(`cya',`r8')
46define(`cyb',`r12')
47
48
49ASM_START()
50PROLOGUE(mpn_mul_2)
51	push	{ r4, r5, r6, r7, r8, r9 }
52
53	ldm	vp, { v0, v1 }
54	mov	cya, #0
55	mov	cyb, #0
56
57	tst	n, #1
58	beq	L(evn)
59L(odd):	mov	r5, #0
60	ldr	u0, [up, #0]
61	mov	r4, #0
62	tst	n, #2
63	beq	L(fi1)
64L(fi3):	sub	up, up, #12
65	sub	rp, rp, #16
66	b	L(lo3)
67L(fi1):	sub	n, n, #1
68	sub	up, up, #4
69	sub	rp, rp, #8
70	b	L(lo1)
71L(evn):	mov	r4, #0
72	ldr	u1, [up, #0]
73	mov	r5, #0
74	tst	n, #2
75	bne	L(fi2)
76L(fi0):	sub	up, up, #8
77	sub	rp, rp, #12
78	b	L(lo0)
79L(fi2):	subs	n, n, #2
80	sub	rp, rp, #4
81	bls	L(end)
82
83	ALIGN(16)
84L(top):	ldr	u0, [up, #4]
85	umaal	r4, cya, u1, v0
86	str	r4, [rp, #4]
87	mov	r4, #0
88	umaal	r5, cyb, u1, v1
89L(lo1):	ldr	u1, [up, #8]
90	umaal	r5, cya, u0, v0
91	str	r5, [rp, #8]
92	mov	r5, #0
93	umaal	r4, cyb, u0, v1
94L(lo0):	ldr	u0, [up, #12]
95	umaal	r4, cya, u1, v0
96	str	r4, [rp, #12]
97	mov	r4, #0
98	umaal	r5, cyb, u1, v1
99L(lo3):	ldr	u1, [up, #16]!
100	umaal	r5, cya, u0, v0
101	str	r5, [rp, #16]!
102	mov	r5, #0
103	umaal	r4, cyb, u0, v1
104	subs	n, n, #4
105	bhi	L(top)
106
107L(end):	umaal	r4, cya, u1, v0
108	ldr	u0, [up, #4]
109	umaal	r5, cyb, u1, v1
110	str	r4, [rp, #4]
111	umaal	r5, cya, u0, v0
112	umaal	cya, cyb, u0, v1
113	str	r5, [rp, #8]
114	str	cya, [rp, #12]
115	mov	r0, cyb
116
117	pop	{ r4, r5, r6, r7, r8, r9 }
118	bx	r14
119EPILOGUE()
120