xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc32/750/lshift.asm (revision 3587d6f89c746bbb4f886219ddacd41ace480ecf)
1dnl  PowerPC 750 mpn_lshift -- mpn left shift.
2
3dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C       cycles/limb
35C 750:     3.0
36C 7400:    3.0
37
38
39C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
40C                       unsigned shift);
41C
42C This code is the same per-limb speed as mpn/powerpc32/lshift.asm, but
43C smaller and saving about 30 or so cycles of overhead.
44
45ASM_START()
46PROLOGUE(mpn_lshift)
47
48	C r3	dst
49	C r4	src
50	C r5	size
51	C r6	shift
52
53	mtctr	r5		C size
54	slwi	r5, r5, 2	C 4*size
55
56	subfic	r7, r6, 32	C 32-shift
57	add	r4, r4, r5	C &src[size]
58
59	add	r5, r3, r5	C &dst[size]
60	lwz	r8, -4(r4)	C src[size-1]
61	bdz	L(one)
62
63	lwzu	r9, -8(r4)	C src[size-2]
64
65	srw	r3, r8, r7	C return value
66	slw	r8, r8, r6	C src[size-1] << shift
67	bdz	L(two)
68
69
70L(top):
71	C r3	return value
72	C r4	src, incrementing
73	C r5	dst, incrementing
74	C r6	lshift
75	C r7	32-shift
76	C r8	src[i+1] << shift
77	C r9	src[i]
78	C r10
79
80	lwzu	r10, -4(r4)
81	srw	r11, r9, r7
82
83	or	r8, r8, r11
84	stwu	r8, -4(r5)
85
86	slw	r8, r9, r6
87	bdz	L(odd)
88
89	C r8	src[i+1] << shift
90	C r9
91	C r10	src[i]
92
93	lwzu	r9, -4(r4)
94	srw	r11, r10, r7
95
96	or	r8, r8, r11
97	stwu	r8, -4(r5)
98
99	slw	r8, r10, r6
100	bdnz	L(top)
101
102
103L(two):
104	C r3	return value
105	C r4
106	C r5	&dst[2]
107	C r6	shift
108	C r7	32-shift
109	C r8	src[1] << shift
110	C r9	src[0]
111	C r10
112
113	srw	r11, r9, r7
114	slw	r12, r9, r6	C src[0] << shift
115
116	or	r8, r8, r11
117	stw	r12, -8(r5)	C dst[0]
118
119	stw	r8, -4(r5)	C dst[1]
120	blr
121
122
123L(odd):
124	C r3	return value
125	C r4
126	C r5	&dst[2]
127	C r6	shift
128	C r7	32-shift
129	C r8	src[1] << shift
130	C r9
131	C r10	src[0]
132
133	srw	r11, r10, r7
134	slw	r12, r10, r6
135
136	or	r8, r8, r11
137	stw	r12, -8(r5)	C dst[0]
138
139	stw	r8, -4(r5)	C dst[1]
140	blr
141
142
143L(one):
144	C r5	&dst[1]
145	C r6	shift
146	C r7	32-shift
147	C r8	src[0]
148
149	srw	r3, r8, r7	C return value
150	slw	r8, r8, r6	C src[size-1] << shift
151
152	stw	r8, -4(r5)	C dst[0]
153	blr
154
155EPILOGUE(mpn_lshift)
156