xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc32/lshiftc.asm (revision 8450a7c42673d65e3b1f6560d3b6ecd317a6cbe8)
1dnl  PowerPC-32 mpn_lshiftc.
2
3dnl  Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
4dnl  Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C                cycles/limb
24C 603e:            ?
25C 604e:            3.0
26C 75x (G3):        3.0
27C 7400,7410 (G4):  3.0
28C 7445,7455 (G4+): 2.5
29C 7447,7457 (G4+): 2.25
30C power4/ppc970:   2.5
31C power5:          2.5
32
33C INPUT PARAMETERS
34C rp	r3
35C up	r4
36C n	r5
37C cnt	r6
38
39ASM_START()
40PROLOGUE(mpn_lshiftc)
41	cmpwi	cr0, r5, 30	C more than 30 limbs?
42	slwi	r0, r5, 2
43	add	r4, r4, r0	C make r4 point at end of s1
44	add	r7, r3, r0	C make r7 point at end of res
45	bgt	L(BIG)		C branch if more than 12 limbs
46
47	mtctr	r5		C copy size into CTR
48	subfic	r8, r6, 32
49	lwzu	r11, -4(r4)	C load first s1 limb
50	srw	r3, r11, r8	C compute function return value
51	bdz	L(end1)
52
53L(oop):	lwzu	r10, -4(r4)
54	slw	r9, r11, r6
55	srw	r12, r10, r8
56	nor	r9, r9, r12
57	stwu	r9, -4(r7)
58	bdz	L(end2)
59	lwzu	r11, -4(r4)
60	slw	r9, r10, r6
61	srw	r12, r11, r8
62	nor	r9, r9, r12
63	stwu	r9, -4(r7)
64	bdnz	L(oop)
65
66L(end1):
67	slw	r0, r11, r6
68	nor	r0, r0, r0
69	stw	r0, -4(r7)
70	blr
71L(end2):
72	slw	r0, r10, r6
73	nor	r0, r0, r0
74	stw	r0, -4(r7)
75	blr
76
77L(BIG):
78	stmw	r24, -32(r1)	C save registers we are supposed to preserve
79	lwzu	r9, -4(r4)
80	subfic	r8, r6, 32
81	srw	r3, r9, r8	C compute function return value
82	slw	r0, r9, r6
83	addi	r5, r5, -1
84
85	andi.	r10, r5, 3	C count for spill loop
86	beq	L(e)
87	mtctr	r10
88	lwzu	r28, -4(r4)
89	bdz	L(xe0)
90
91L(loop0):
92	slw	r12, r28, r6
93	srw	r24, r28, r8
94	lwzu	r28, -4(r4)
95	nor	r24, r0, r24
96	stwu	r24, -4(r7)
97	mr	r0, r12
98	bdnz	L(loop0)	C taken at most once!
99
100L(xe0):	slw	r12, r28, r6
101	srw	r24, r28, r8
102	nor	r24, r0, r24
103	stwu	r24, -4(r7)
104	mr	r0, r12
105
106L(e):	srwi	r5, r5, 2	C count for unrolled loop
107	addi	r5, r5, -1
108	mtctr	r5
109	lwz	r28, -4(r4)
110	lwz	r29, -8(r4)
111	lwz	r30, -12(r4)
112	lwzu	r31, -16(r4)
113
114L(loopU):
115	slw	r9, r28, r6
116	srw	r24, r28, r8
117	lwz	r28, -4(r4)
118	slw	r10, r29, r6
119	srw	r25, r29, r8
120	lwz	r29, -8(r4)
121	slw	r11, r30, r6
122	srw	r26, r30, r8
123	lwz	r30, -12(r4)
124	slw	r12, r31, r6
125	srw	r27, r31, r8
126	lwzu	r31, -16(r4)
127	nor	r24, r0, r24
128	stw	r24, -4(r7)
129	nor	r25, r9, r25
130	stw	r25, -8(r7)
131	nor	r26, r10, r26
132	stw	r26, -12(r7)
133	nor	r27, r11, r27
134	stwu	r27, -16(r7)
135	mr	r0, r12
136	bdnz	L(loopU)
137
138	slw	r9, r28, r6
139	srw	r24, r28, r8
140	slw	r10, r29, r6
141	srw	r25, r29, r8
142	slw	r11, r30, r6
143	srw	r26, r30, r8
144	slw	r12, r31, r6
145	srw	r27, r31, r8
146	nor	r24, r0, r24
147	stw	r24, -4(r7)
148	nor	r25, r9, r25
149	stw	r25, -8(r7)
150	nor	r26, r10, r26
151	stw	r26, -12(r7)
152	nor	r27, r11, r27
153	stw	r27, -16(r7)
154	nor	r12, r12, r12
155	stw	r12, -20(r7)
156	lmw	r24, -32(r1)	C restore registers
157	blr
158EPILOGUE()
159