xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc32/lshift.asm (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1dnl  PowerPC-32 mpn_lshift -- Shift a number left.
2
3dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software
4dnl  Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C                cycles/limb
24C 603e:            ?
25C 604e:            3.0
26C 75x (G3):        3.0
27C 7400,7410 (G4):  3.0
28C 7445,7455 (G4+): 2.5
29C 7447,7457 (G4+): 2.25
30C power4/ppc970:   2.5
31C power5:          2.5
32
33C INPUT PARAMETERS
34C rp	r3
35C up	r4
36C n	r5
37C cnt	r6
38
39ASM_START()
40PROLOGUE(mpn_lshift)
41	cmpwi	cr0, r5, 30	C more than 30 limbs?
42	slwi	r0, r5, 2
43	add	r4, r4, r0	C make r4 point at end of s1
44	add	r7, r3, r0	C make r7 point at end of res
45	bgt	L(BIG)		C branch if more than 12 limbs
46
47	mtctr	r5		C copy size into CTR
48	subfic	r8, r6, 32
49	lwzu	r11, -4(r4)	C load first s1 limb
50	srw	r3, r11, r8	C compute function return value
51	bdz	L(end1)
52
53L(oop):	lwzu	r10, -4(r4)
54	slw	r9, r11, r6
55	srw	r12, r10, r8
56	or	r9, r9, r12
57	stwu	r9, -4(r7)
58	bdz	L(end2)
59	lwzu	r11, -4(r4)
60	slw	r9, r10, r6
61	srw	r12, r11, r8
62	or	r9, r9, r12
63	stwu	r9, -4(r7)
64	bdnz	L(oop)
65
66L(end1):
67	slw	r0, r11, r6
68	stw	r0, -4(r7)
69	blr
70L(end2):
71	slw	r0, r10, r6
72	stw	r0, -4(r7)
73	blr
74
75L(BIG):
76	stmw	r24, -32(r1)	C save registers we are supposed to preserve
77	lwzu	r9, -4(r4)
78	subfic	r8, r6, 32
79	srw	r3, r9, r8	C compute function return value
80	slw	r0, r9, r6
81	addi	r5, r5, -1
82
83	andi.	r10, r5, 3	C count for spill loop
84	beq	L(e)
85	mtctr	r10
86	lwzu	r28, -4(r4)
87	bdz	L(xe0)
88
89L(loop0):
90	slw	r12, r28, r6
91	srw	r24, r28, r8
92	lwzu	r28, -4(r4)
93	or	r24, r0, r24
94	stwu	r24, -4(r7)
95	mr	r0, r12
96	bdnz	L(loop0)	C taken at most once!
97
98L(xe0):	slw	r12, r28, r6
99	srw	r24, r28, r8
100	or	r24, r0, r24
101	stwu	r24, -4(r7)
102	mr	r0, r12
103
104L(e):	srwi	r5, r5, 2	C count for unrolled loop
105	addi	r5, r5, -1
106	mtctr	r5
107	lwz	r28, -4(r4)
108	lwz	r29, -8(r4)
109	lwz	r30, -12(r4)
110	lwzu	r31, -16(r4)
111
112L(loopU):
113	slw	r9, r28, r6
114	srw	r24, r28, r8
115	lwz	r28, -4(r4)
116	slw	r10, r29, r6
117	srw	r25, r29, r8
118	lwz	r29, -8(r4)
119	slw	r11, r30, r6
120	srw	r26, r30, r8
121	lwz	r30, -12(r4)
122	slw	r12, r31, r6
123	srw	r27, r31, r8
124	lwzu	r31, -16(r4)
125	or	r24, r0, r24
126	stw	r24, -4(r7)
127	or	r25, r9, r25
128	stw	r25, -8(r7)
129	or	r26, r10, r26
130	stw	r26, -12(r7)
131	or	r27, r11, r27
132	stwu	r27, -16(r7)
133	mr	r0, r12
134	bdnz	L(loopU)
135
136	slw	r9, r28, r6
137	srw	r24, r28, r8
138	slw	r10, r29, r6
139	srw	r25, r29, r8
140	slw	r11, r30, r6
141	srw	r26, r30, r8
142	slw	r12, r31, r6
143	srw	r27, r31, r8
144	or	r24, r0, r24
145	stw	r24, -4(r7)
146	or	r25, r9, r25
147	stw	r25, -8(r7)
148	or	r26, r10, r26
149	stw	r26, -12(r7)
150	or	r27, r11, r27
151	stw	r27, -16(r7)
152
153	stw	r12, -20(r7)
154	lmw	r24, -32(r1)	C restore registers
155	blr
156EPILOGUE()
157