xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc64/lshift.asm (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1dnl  PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
2
3dnl  Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                   cycles/limb
34C POWER3/PPC630          ?
35C POWER4/PPC970          ?
36C POWER5                 2.25
37C POWER6                 9.75
38C POWER7                 2.15
39
40C TODO
41C  * Try to reduce the number of needed live registers
42C  * Micro-optimise header code
43C  * Keep in synch with rshift.asm and lshiftc.asm
44
45C INPUT PARAMETERS
46define(`rp',  `r3')
47define(`up',  `r4')
48define(`n',   `r5')
49define(`cnt', `r6')
50
51define(`tnc',`r0')
52define(`u0',`r30')
53define(`u1',`r31')
54define(`retval',`r5')
55
56ASM_START()
57PROLOGUE(mpn_lshift)
58	std	r31, -8(r1)
59	std	r30, -16(r1)
60	subfic	tnc, cnt, 64
61	sldi	r7, n, 3	C byte count corresponding to n
62	add	up, up, r7	C up = up + n
63	add	rp, rp, r7	C rp = rp + n
64	rldicl.	r30, n, 0,62	C r30 = n & 3, set cr0
65	cmpdi	cr6, r30, 2
66	addi	r31, n, 3	C compute count...
67	ld	r10, -8(up)	C load 1st limb for b00...b11
68	srd	retval, r10, tnc
69ifdef(`HAVE_ABI_mode32',
70`	rldicl	r31, r31, 62,34',	C ...branch count
71`	srdi	r31, r31, 2')	C ...for ctr
72	mtctr	r31		C copy count into ctr
73	beq	cr0, L(b00)
74	blt	cr6, L(b01)
75	ld	r11, -16(up)	C load 2nd limb for b10 and b11
76	beq	cr6, L(b10)
77
78	ALIGN(16)
79L(b11):	sld	r8, r10, cnt
80	srd	r9, r11, tnc
81	ld	u1, -24(up)
82	addi	up, up, -24
83	sld	r12, r11, cnt
84	srd	r7, u1, tnc
85	addi	rp, rp, 16
86	bdnz	L(gt3)
87
88	or	r11, r8, r9
89	sld	r8, u1, cnt
90	b	L(cj3)
91
92	ALIGN(16)
93L(gt3):	ld	u0, -8(up)
94	or	r11, r8, r9
95	sld	r8, u1, cnt
96	srd	r9, u0, tnc
97	ld	u1, -16(up)
98	or	r10, r12, r7
99	b	L(L11)
100
101	ALIGN(32)
102L(b10):	sld	r12, r10, cnt
103	addi	rp, rp, 24
104	srd	r7, r11, tnc
105	bdnz	L(gt2)
106
107	sld	r8, r11, cnt
108	or	r10, r12, r7
109	b	L(cj2)
110
111L(gt2):	ld	u0, -24(up)
112	sld	r8, r11, cnt
113	srd	r9, u0, tnc
114	ld	u1, -32(up)
115	or	r10, r12, r7
116	sld	r12, u0, cnt
117	srd	r7, u1, tnc
118	ld	u0, -40(up)
119	or	r11, r8, r9
120	addi	up, up, -16
121	b	L(L10)
122
123	ALIGN(16)
124L(b00):	ld	u1, -16(up)
125	sld	r12, r10, cnt
126	srd	r7, u1, tnc
127	ld	u0, -24(up)
128	sld	r8, u1, cnt
129	srd	r9, u0, tnc
130	ld	u1, -32(up)
131	or	r10, r12, r7
132	sld	r12, u0, cnt
133	srd	r7, u1, tnc
134	addi	rp, rp, 8
135	bdz	L(cj4)
136
137L(gt4):	addi	up, up, -32
138	ld	u0, -8(up)
139	or	r11, r8, r9
140	b	L(L00)
141
142	ALIGN(16)
143L(b01):	bdnz	L(gt1)
144	sld	r8, r10, cnt
145	std	r8, -8(rp)
146	b	L(ret)
147
148L(gt1):	ld	u0, -16(up)
149	sld	r8, r10, cnt
150	srd	r9, u0, tnc
151	ld	u1, -24(up)
152	sld	r12, u0, cnt
153	srd	r7, u1, tnc
154	ld	u0, -32(up)
155	or	r11, r8, r9
156	sld	r8, u1, cnt
157	srd	r9, u0, tnc
158	ld	u1, -40(up)
159	addi	up, up, -40
160	or	r10, r12, r7
161	bdz	L(end)
162
163	ALIGN(32)
164L(top):	sld	r12, u0, cnt
165	srd	r7, u1, tnc
166	ld	u0, -8(up)
167	std	r11, -8(rp)
168	or	r11, r8, r9
169L(L00):	sld	r8, u1, cnt
170	srd	r9, u0, tnc
171	ld	u1, -16(up)
172	std	r10, -16(rp)
173	or	r10, r12, r7
174L(L11):	sld	r12, u0, cnt
175	srd	r7, u1, tnc
176	ld	u0, -24(up)
177	std	r11, -24(rp)
178	or	r11, r8, r9
179L(L10):	sld	r8, u1, cnt
180	srd	r9, u0, tnc
181	ld	u1, -32(up)
182	addi	up, up, -32
183	std	r10, -32(rp)
184	addi	rp, rp, -32
185	or	r10, r12, r7
186	bdnz	L(top)
187
188	ALIGN(32)
189L(end):	sld	r12, u0, cnt
190	srd	r7, u1, tnc
191	std	r11, -8(rp)
192L(cj4):	or	r11, r8, r9
193	sld	r8, u1, cnt
194	std	r10, -16(rp)
195L(cj3):	or	r10, r12, r7
196	std	r11, -24(rp)
197L(cj2):	std	r10, -32(rp)
198	std	r8, -40(rp)
199
200L(ret):	ld	r31, -8(r1)
201	ld	r30, -16(r1)
202ifdef(`HAVE_ABI_mode32',
203`	srdi	r3, retval, 32
204	mr	r4, retval
205',`	mr	r3, retval')
206	blr
207EPILOGUE()
208