xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc32/lshift.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  PowerPC-32 mpn_lshift -- Shift a number left.
2
3dnl  Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C                cycles/limb
34C 603e:            ?
35C 604e:            3.0
36C 75x (G3):        3.0
37C 7400,7410 (G4):  3.0
38C 7445,7455 (G4+): 2.5
39C 7447,7457 (G4+): 2.25
40C power4/ppc970:   2.5
41C power5:          2.5
42
43C INPUT PARAMETERS
44C rp	r3
45C up	r4
46C n	r5
47C cnt	r6
48
49ASM_START()
50PROLOGUE(mpn_lshift)
51	cmpwi	cr0, r5, 30	C more than 30 limbs?
52	slwi	r0, r5, 2
53	add	r4, r4, r0	C make r4 point at end of s1
54	add	r7, r3, r0	C make r7 point at end of res
55	bgt	L(BIG)		C branch if more than 12 limbs
56
57	mtctr	r5		C copy size into CTR
58	subfic	r8, r6, 32
59	lwzu	r11, -4(r4)	C load first s1 limb
60	srw	r3, r11, r8	C compute function return value
61	bdz	L(end1)
62
63L(oop):	lwzu	r10, -4(r4)
64	slw	r9, r11, r6
65	srw	r12, r10, r8
66	or	r9, r9, r12
67	stwu	r9, -4(r7)
68	bdz	L(end2)
69	lwzu	r11, -4(r4)
70	slw	r9, r10, r6
71	srw	r12, r11, r8
72	or	r9, r9, r12
73	stwu	r9, -4(r7)
74	bdnz	L(oop)
75
76L(end1):
77	slw	r0, r11, r6
78	stw	r0, -4(r7)
79	blr
80L(end2):
81	slw	r0, r10, r6
82	stw	r0, -4(r7)
83	blr
84
85L(BIG):
86	stwu	r1, -48(r1)
87	stmw	r24, 8(r1)	C save registers we are supposed to preserve
88	lwzu	r9, -4(r4)
89	subfic	r8, r6, 32
90	srw	r3, r9, r8	C compute function return value
91	slw	r0, r9, r6
92	addi	r5, r5, -1
93
94	andi.	r10, r5, 3	C count for spill loop
95	beq	L(e)
96	mtctr	r10
97	lwzu	r28, -4(r4)
98	bdz	L(xe0)
99
100L(loop0):
101	slw	r12, r28, r6
102	srw	r24, r28, r8
103	lwzu	r28, -4(r4)
104	or	r24, r0, r24
105	stwu	r24, -4(r7)
106	mr	r0, r12
107	bdnz	L(loop0)	C taken at most once!
108
109L(xe0):	slw	r12, r28, r6
110	srw	r24, r28, r8
111	or	r24, r0, r24
112	stwu	r24, -4(r7)
113	mr	r0, r12
114
115L(e):	srwi	r5, r5, 2	C count for unrolled loop
116	addi	r5, r5, -1
117	mtctr	r5
118	lwz	r28, -4(r4)
119	lwz	r29, -8(r4)
120	lwz	r30, -12(r4)
121	lwzu	r31, -16(r4)
122
123L(loopU):
124	slw	r9, r28, r6
125	srw	r24, r28, r8
126	lwz	r28, -4(r4)
127	slw	r10, r29, r6
128	srw	r25, r29, r8
129	lwz	r29, -8(r4)
130	slw	r11, r30, r6
131	srw	r26, r30, r8
132	lwz	r30, -12(r4)
133	slw	r12, r31, r6
134	srw	r27, r31, r8
135	lwzu	r31, -16(r4)
136	or	r24, r0, r24
137	stw	r24, -4(r7)
138	or	r25, r9, r25
139	stw	r25, -8(r7)
140	or	r26, r10, r26
141	stw	r26, -12(r7)
142	or	r27, r11, r27
143	stwu	r27, -16(r7)
144	mr	r0, r12
145	bdnz	L(loopU)
146
147	slw	r9, r28, r6
148	srw	r24, r28, r8
149	slw	r10, r29, r6
150	srw	r25, r29, r8
151	slw	r11, r30, r6
152	srw	r26, r30, r8
153	slw	r12, r31, r6
154	srw	r27, r31, r8
155	or	r24, r0, r24
156	stw	r24, -4(r7)
157	or	r25, r9, r25
158	stw	r25, -8(r7)
159	or	r26, r10, r26
160	stw	r26, -12(r7)
161	or	r27, r11, r27
162	stw	r27, -16(r7)
163
164	stw	r12, -20(r7)
165	lmw	r24, 8(r1)	C restore registers
166	addi	r1, r1, 48
167	blr
168EPILOGUE()
169