xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm64/lshift.asm (revision 7863ba460b0a05b553c754e5dbc29247dddec322)
1dnl  ARM64 mpn_lshift.
2
3dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C	     cycles/limb
23C Cortex-A53	 ?
24C Cortex-A57	 ?
25
26changecom(@&*$)
27
28define(`rp_arg', `x0')
29define(`up',     `x1')
30define(`n',      `x2')
31define(`cnt',    `x3')
32
33define(`rp',     `x16')
34
35define(`tnc',`x8')
36
37ASM_START()
38PROLOGUE(mpn_lshift)
39	add	rp, rp_arg, n, lsl #3
40	add	up, up, n, lsl #3
41	sub	tnc, xzr, cnt
42	tbz	n, #0, L(bx0)
43
44L(bx1):	ldr	x4, [up,#-8]
45	tbnz	n, #1, L(b11)
46
47L(b01):	lsr	x0, x4, tnc
48	lsl	x18, x4, cnt
49	sub	n, n, #1
50	cbnz	n, L(gt1)
51	str	x18, [rp,#-8]
52	ret
53L(gt1):	ldp	x4, x5, [up,#-24]
54	sub	up, up, #8
55	add	rp, rp, #16
56	b	L(lo2)
57
58L(b11):	lsr	x0, x4, tnc
59	lsl	x9, x4, cnt
60	ldp	x6, x7, [up,#-24]
61	add	n, n, #1
62	add	up, up, #8
63	add	rp, rp, #32
64	b	L(lo0)
65
66L(bx0):	ldp	x4, x5, [up,#-16]
67	tbz	n, #1, L(b00)
68
69L(b10):	lsr	x0, x5, tnc
70	lsl	x13, x5, cnt
71	lsr	x10, x4, tnc
72	lsl	x18, x4, cnt
73	sub	n, n, #2
74	cbnz	n, L(gt2)
75	orr	x10, x10, x13
76	stp	x18, x10, [rp,#-16]
77	ret
78L(gt2):	ldp	x4, x5, [up,#-32]
79	orr	x10, x10, x13
80	str	x10, [rp,#-8]
81	sub	up, up, #16
82	add	rp, rp, #8
83	b	L(lo2)
84
85L(b00):	lsr	x0, x5, tnc
86	lsl	x13, x5, cnt
87	lsr	x10, x4, tnc
88	lsl	x9, x4, cnt
89	ldp	x6, x7, [up,#-32]
90	orr	x10, x10, x13
91	str	x10, [rp,#-8]
92	add	rp, rp, #24
93	b	L(lo0)
94
95	ALIGN(16)
96L(top):	ldp	x4, x5, [up,#-48]
97	sub	rp, rp, #32		C integrate with stp?
98	sub	up, up, #32		C integrate with ldp?
99	orr	x11, x11, x9
100	orr	x10, x10, x13
101	stp	x10, x11, [rp,#-16]
102L(lo2):	lsr	x11, x5, tnc
103	lsl	x13, x5, cnt
104	lsr	x10, x4, tnc
105	lsl	x9, x4, cnt
106	ldp	x6, x7, [up,#-32]
107	orr	x11, x11, x18
108	orr	x10, x10, x13
109	stp	x10, x11, [rp,#-32]
110L(lo0):	sub	n, n, #4
111	lsr	x11, x7, tnc
112	lsl	x13, x7, cnt
113	lsr	x10, x6, tnc
114	lsl	x18, x6, cnt
115	cbnz	n, L(top)
116
117L(end):	orr	x11, x11, x9
118	orr	x10, x10, x13
119	stp	x10, x11, [rp,#-48]
120	str	x18, [rp,#-56]
121	ret
122EPILOGUE()
123