xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/lshiftc.asm (revision c3ab26950fe8540fb553d1d1dcae454bc98e5a25)
1dnl  PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
2
3dnl  Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C                   cycles/limb
23C POWER3/PPC630          ?
24C POWER4/PPC970          ?
25C POWER5                 2.25
26C POWER6                 9.5
27C POWER7                 2.15
28
29C TODO
30C  * Try to reduce the number of needed live registers
31C  * Micro-optimise header code
32C  * Keep in synch with lshift.asm and rshift.asm
33
34C INPUT PARAMETERS
35define(`rp',  `r3')
36define(`up',  `r4')
37define(`n',   `r5')
38define(`cnt', `r6')
39
40define(`tnc',`r0')
41define(`u0',`r30')
42define(`u1',`r31')
43define(`retval',`r5')
44
45ASM_START()
46PROLOGUE(mpn_lshiftc)
47	std	r31, -8(r1)
48	std	r30, -16(r1)
49	subfic	tnc, cnt, 64
50	sldi	r7, n, 3	C byte count corresponding to n
51	add	up, up, r7	C up = up + n
52	add	rp, rp, r7	C rp = rp + n
53	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
54	cmpdi	cr6, r30, 2
55	addi	r31, n, 3	C compute count...
56	ld	r10, -8(up)	C load 1st limb for b00...b11
57	srd	retval, r10, tnc
58	srdi	r31, r31, 2	C ...for ctr
59	mtctr	r31		C copy count into ctr
60	beq	cr0, L(b00)
61	blt	cr6, L(b01)
62	ld	r11, -16(up)	C load 2nd limb for b10 and b11
63	beq	cr6, L(b10)
64
65	ALIGN(16)
66L(b11):	sld	r8, r10, cnt
67	srd	r9, r11, tnc
68	ld	u1, -24(up)
69	addi	up, up, -24
70	sld	r12, r11, cnt
71	srd	r7, u1, tnc
72	addi	rp, rp, 16
73	bdnz	L(gt3)
74
75	nor	r11, r8, r9
76	sld	r8, u1, cnt
77	nor	r8, r8, r8
78	b	L(cj3)
79
80	ALIGN(16)
81L(gt3):	ld	u0, -8(up)
82	nor	r11, r8, r9
83	sld	r8, u1, cnt
84	srd	r9, u0, tnc
85	ld	u1, -16(up)
86	nor	r10, r12, r7
87	b	L(L11)
88
89	ALIGN(32)
90L(b10):	sld	r12, r10, cnt
91	addi	rp, rp, 24
92	srd	r7, r11, tnc
93	bdnz	L(gt2)
94
95	sld	r8, r11, cnt
96	nor	r10, r12, r7
97	nor	r8, r8, r8
98	b	L(cj2)
99
100L(gt2):	ld	u0, -24(up)
101	sld	r8, r11, cnt
102	srd	r9, u0, tnc
103	ld	u1, -32(up)
104	nor	r10, r12, r7
105	sld	r12, u0, cnt
106	srd	r7, u1, tnc
107	ld	u0, -40(up)
108	nor	r11, r8, r9
109	addi	up, up, -16
110	b	L(L10)
111
112	ALIGN(16)
113L(b00):	ld	u1, -16(up)
114	sld	r12, r10, cnt
115	srd	r7, u1, tnc
116	ld	u0, -24(up)
117	sld	r8, u1, cnt
118	srd	r9, u0, tnc
119	ld	u1, -32(up)
120	nor	r10, r12, r7
121	sld	r12, u0, cnt
122	srd	r7, u1, tnc
123	addi	rp, rp, 8
124	bdz	L(cj4)
125
126L(gt4):	addi	up, up, -32
127	ld	u0, -8(up)
128	nor	r11, r8, r9
129	b	L(L00)
130
131	ALIGN(16)
132L(b01):	bdnz	L(gt1)
133	sld	r8, r10, cnt
134	nor	r8, r8, r8
135	std	r8, -8(rp)
136	b	L(ret)
137
138L(gt1):	ld	u0, -16(up)
139	sld	r8, r10, cnt
140	srd	r9, u0, tnc
141	ld	u1, -24(up)
142	sld	r12, u0, cnt
143	srd	r7, u1, tnc
144	ld	u0, -32(up)
145	nor	r11, r8, r9
146	sld	r8, u1, cnt
147	srd	r9, u0, tnc
148	ld	u1, -40(up)
149	addi	up, up, -40
150	nor	r10, r12, r7
151	bdz	L(end)
152
153	ALIGN(32)
154L(top):	sld	r12, u0, cnt
155	srd	r7, u1, tnc
156	ld	u0, -8(up)
157	std	r11, -8(rp)
158	nor	r11, r8, r9
159L(L00):	sld	r8, u1, cnt
160	srd	r9, u0, tnc
161	ld	u1, -16(up)
162	std	r10, -16(rp)
163	nor	r10, r12, r7
164L(L11):	sld	r12, u0, cnt
165	srd	r7, u1, tnc
166	ld	u0, -24(up)
167	std	r11, -24(rp)
168	nor	r11, r8, r9
169L(L10):	sld	r8, u1, cnt
170	srd	r9, u0, tnc
171	ld	u1, -32(up)
172	addi	up, up, -32
173	std	r10, -32(rp)
174	addi	rp, rp, -32
175	nor	r10, r12, r7
176	bdnz	L(top)
177
178	ALIGN(32)
179L(end):	sld	r12, u0, cnt
180	srd	r7, u1, tnc
181	std	r11, -8(rp)
182L(cj4):	nor	r11, r8, r9
183	sld	r8, u1, cnt
184	std	r10, -16(rp)
185	nor	r8, r8, r8
186L(cj3):	nor	r10, r12, r7
187	std	r11, -24(rp)
188L(cj2):	std	r10, -32(rp)
189	std	r8, -40(rp)
190
191L(ret):	ld	r31, -8(r1)
192	ld	r30, -16(r1)
193ifdef(`HAVE_ABI_mode32',
194`	srdi	r3, retval, 32
195	mr	r4, retval
196',`	mr	r3, retval')
197	blr
198EPILOGUE()
199