xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/arm64/rshift.asm (revision eceb233b9bd0dfebb902ed73b531ae6964fa3f9b)
1dnl  ARM64 mpn_rshift.
2
3dnl  Copyright 2013, 2014 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C	     cycles/limb
23C Cortex-A53	 ?
24C Cortex-A57	 ?
25
26changecom(@&*$)
27
28define(`rp_arg', `x0')
29define(`up',     `x1')
30define(`n',      `x2')
31define(`cnt',    `x3')
32
33define(`rp',     `x16')
34
35define(`tnc',`x8')
36
37ASM_START()
38PROLOGUE(mpn_rshift)
39	mov	rp, rp_arg
40	sub	tnc, xzr, cnt
41	tbz	n, #0, L(bx0)
42
43L(bx1):	ldr	x4, [up,#0]
44	tbnz	n, #1, L(b11)
45
46L(b01):	lsl	x0, x4, tnc
47	lsr	x18, x4, cnt
48	sub	n, n, #1
49	cbnz	n, L(gt1)
50	str	x18, [rp,#0]
51	ret
52L(gt1):	ldp	x5, x4, [up,#8]
53	sub	up, up, #8
54	sub	rp, rp, #32
55	b	L(lo2)
56
57L(b11):	lsl	x0, x4, tnc
58	lsr	x9, x4, cnt
59	ldp	x7, x6, [up,#8]
60	add	n, n, #1
61	sub	up, up, #24
62	sub	rp, rp, #48
63	b	L(lo0)
64
65L(bx0):	ldp	x5, x4, [up,#0]
66	tbz	n, #1, L(b00)
67
68L(b10):	lsl	x0, x5, tnc
69	lsr	x13, x5, cnt
70	lsl	x10, x4, tnc
71	lsr	x18, x4, cnt
72	sub	n, n, #2
73	cbnz	n, L(gt2)
74	orr	x10, x10, x13
75	stp	x10, x18, [rp,#0]
76	ret
77L(gt2):	ldp	x5, x4, [up,#16]
78	orr	x10, x10, x13
79	str	x10, [rp,#0]
80	sub	rp, rp, #24
81	b	L(lo2)
82
83L(b00):	lsl	x0, x5, tnc
84	lsr	x13, x5, cnt
85	lsl	x10, x4, tnc
86	lsr	x9, x4, cnt
87	ldp	x7, x6, [up,#16]
88	orr	x10, x10, x13
89	str	x10, [rp,#0]
90	sub	up, up, #16
91	sub	rp, rp, #40
92	b	L(lo0)
93
94	ALIGN(16)
95L(top):	ldp	x5, x4, [up,#48]
96	add	rp, rp, #32		C integrate with stp?
97	add	up, up, #32		C integrate with ldp?
98	orr	x11, x11, x9
99	orr	x10, x10, x13
100	stp	x11, x10, [rp,#16]
101L(lo2):	lsl	x11, x5, tnc
102	lsr	x13, x5, cnt
103	lsl	x10, x4, tnc
104	lsr	x9, x4, cnt
105	ldp	x7, x6, [up,#32]
106	orr	x11, x11, x18
107	orr	x10, x10, x13
108	stp	x11, x10, [rp,#32]
109L(lo0):	sub	n, n, #4
110	lsl	x11, x7, tnc
111	lsr	x13, x7, cnt
112	lsl	x10, x6, tnc
113	lsr	x18, x6, cnt
114	cbnz	n, L(top)
115
116L(end):	orr	x11, x11, x9
117	orr	x10, x10, x13
118	stp	x11, x10, [rp,#48]
119	str	x18, [rp,#64]
120	ret
121EPILOGUE()
122