xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/rshift.asm (revision 49d8c9ecf4abd21261269266ef64939f71b3cd09)
1dnl  PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
2
3dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C                   cycles/limb
23C POWER3/PPC630          ?
24C POWER4/PPC970          ?
25C POWER5                 2.25
26C POWER6                 9.75
27C POWER7                 2.15
28
29C TODO
30C  * Try to reduce the number of needed live registers
31C  * Micro-optimise header code
32C  * Keep in synch with lshift.asm and lshiftc.asm
33
34C INPUT PARAMETERS
35define(`rp',  `r3')
36define(`up',  `r4')
37define(`n',   `r5')
38define(`cnt', `r6')
39
40define(`tnc',`r0')
41define(`u0',`r30')
42define(`u1',`r31')
43define(`retval',`r5')
44
45ASM_START()
46PROLOGUE(mpn_rshift)
47	std	r31, -8(r1)
48	std	r30, -16(r1)
49	subfic	tnc, cnt, 64
50C	sldi	r30, n, 3	C byte count corresponding to n
51C	add	rp, rp, r30	C rp = rp + n
52C	add	up, up, r30	C up = up + n
53	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
54	cmpdi	cr6, r30, 2
55	addi	r31, n, 3	C compute count...
56	ld	r10, 0(up)	C load 1st limb for b00...b11
57	sld	retval, r10, tnc
58ifdef(`HAVE_ABI_mode32',
59`	rldicl	r31, r31, 62,34',	C ...branch count
60`	srdi	r31, r31, 2')	C ...for ctr
61	mtctr	r31		C copy count into ctr
62	beq	cr0, L(b00)
63	blt	cr6, L(b01)
64	ld	r11, 8(up)	C load 2nd limb for b10 and b11
65	beq	cr6, L(b10)
66
67	ALIGN(16)
68L(b11):	srd	r8, r10, cnt
69	sld	r9, r11, tnc
70	ld	u1, 16(up)
71	addi	up, up, 24
72	srd	r12, r11, cnt
73	sld	r7, u1, tnc
74	addi	rp, rp, -16
75	bdnz	L(gt3)
76
77	or	r11, r8, r9
78	srd	r8, u1, cnt
79	b	L(cj3)
80
81	ALIGN(16)
82L(gt3):	ld	u0, 0(up)
83	or	r11, r8, r9
84	srd	r8, u1, cnt
85	sld	r9, u0, tnc
86	ld	u1, 8(up)
87	or	r10, r12, r7
88	b	L(L11)
89
90	ALIGN(32)
91L(b10):	srd	r12, r10, cnt
92	addi	rp, rp, -24
93	sld	r7, r11, tnc
94	bdnz	L(gt2)
95
96	srd	r8, r11, cnt
97	or	r10, r12, r7
98	b	L(cj2)
99
100L(gt2):	ld	u0, 16(up)
101	srd	r8, r11, cnt
102	sld	r9, u0, tnc
103	ld	u1, 24(up)
104	or	r10, r12, r7
105	srd	r12, u0, cnt
106	sld	r7, u1, tnc
107	ld	u0, 32(up)
108	or	r11, r8, r9
109	addi	up, up, 16
110	b	L(L10)
111
112	ALIGN(16)
113L(b00):	ld	u1, 8(up)
114	srd	r12, r10, cnt
115	sld	r7, u1, tnc
116	ld	u0, 16(up)
117	srd	r8, u1, cnt
118	sld	r9, u0, tnc
119	ld	u1, 24(up)
120	or	r10, r12, r7
121	srd	r12, u0, cnt
122	sld	r7, u1, tnc
123	addi	rp, rp, -8
124	bdz	L(cj4)
125
126L(gt4):	addi	up, up, 32
127	ld	u0, 0(up)
128	or	r11, r8, r9
129	b	L(L00)
130
131	ALIGN(16)
132L(b01):	bdnz	L(gt1)
133	srd	r8, r10, cnt
134	std	r8, 0(rp)
135	b	L(ret)
136
137L(gt1):	ld	u0, 8(up)
138	srd	r8, r10, cnt
139	sld	r9, u0, tnc
140	ld	u1, 16(up)
141	srd	r12, u0, cnt
142	sld	r7, u1, tnc
143	ld	u0, 24(up)
144	or	r11, r8, r9
145	srd	r8, u1, cnt
146	sld	r9, u0, tnc
147	ld	u1, 32(up)
148	addi	up, up, 40
149	or	r10, r12, r7
150	bdz	L(end)
151
152	ALIGN(32)
153L(top):	srd	r12, u0, cnt
154	sld	r7, u1, tnc
155	ld	u0, 0(up)
156	std	r11, 0(rp)
157	or	r11, r8, r9
158L(L00):	srd	r8, u1, cnt
159	sld	r9, u0, tnc
160	ld	u1, 8(up)
161	std	r10, 8(rp)
162	or	r10, r12, r7
163L(L11):	srd	r12, u0, cnt
164	sld	r7, u1, tnc
165	ld	u0, 16(up)
166	std	r11, 16(rp)
167	or	r11, r8, r9
168L(L10):	srd	r8, u1, cnt
169	sld	r9, u0, tnc
170	ld	u1, 24(up)
171	addi	up, up, 32
172	std	r10, 24(rp)
173	addi	rp, rp, 32
174	or	r10, r12, r7
175	bdnz	L(top)
176
177	ALIGN(32)
178L(end):	srd	r12, u0, cnt
179	sld	r7, u1, tnc
180	std	r11, 0(rp)
181L(cj4):	or	r11, r8, r9
182	srd	r8, u1, cnt
183	std	r10, 8(rp)
184L(cj3):	or	r10, r12, r7
185	std	r11, 16(rp)
186L(cj2):	std	r10, 24(rp)
187	std	r8, 32(rp)
188
189L(ret):	ld	r31, -8(r1)
190	ld	r30, -16(r1)
191ifdef(`HAVE_ABI_mode32',
192`	srdi	r3, retval, 32
193	mr	r4, retval
194',`	mr	r3, retval')
195	blr
196EPILOGUE()
197