xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/powerpc32/750/rshift.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  PowerPC 750 mpn_rshift -- mpn right shift.
2
3dnl  Copyright 2002, 2003 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C       cycles/limb
35C 750:     3.0
36C 7400:    3.0
37
38
39C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
40C                       unsigned shift);
41C
42C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but
43C smaller and saving about 30 or so cycles of overhead.
44
45ASM_START()
46PROLOGUE(mpn_rshift)
47
48	C r3	dst
49	C r4	src
50	C r5	size
51	C r6	shift
52
53	mtctr	r5		C size
54	lwz	r8, 0(r4)	C src[0]
55
56	subfic	r7, r6, 32	C 32-shift
57	addi	r5, r3, -4	C dst-4
58
59	slw	r3, r8, r7	C return value
60	bdz	L(one)
61
62	lwzu	r9, 4(r4)	C src[1]
63	srw	r8, r8, r6	C src[0] >> shift
64	bdz	L(two)
65
66
67L(top):
68	C r3	return value
69	C r4	src, incrementing
70	C r5	dst, incrementing
71	C r6	shift
72	C r7	32-shift
73	C r8	src[i-1] >> shift
74	C r9	src[i]
75	C r10
76
77	lwzu	r10, 4(r4)
78	slw	r11, r9, r7
79
80	or	r8, r8, r11
81	stwu	r8, 4(r5)
82
83	srw	r8, r9, r6
84	bdz	L(odd)
85
86	C r8	src[i-1] >> shift
87	C r9
88	C r10	src[i]
89
90	lwzu	r9, 4(r4)
91	slw	r11, r10, r7
92
93	or	r8, r8, r11
94	stwu	r8, 4(r5)
95
96	srw	r8, r10, r6
97	bdnz	L(top)
98
99
100L(two):
101	C r3	return value
102	C r4
103	C r5	&dst[size-2]
104	C r6	shift
105	C r7	32-shift
106	C r8	src[size-2] >> shift
107	C r9	src[size-1]
108	C r10
109
110	slw	r11, r9, r7
111	srw	r12, r9, r6	C src[size-1] >> shift
112
113	or	r8, r8, r11
114	stw	r12, 8(r5)	C dst[size-1]
115
116	stw	r8, 4(r5)	C dst[size-2]
117	blr
118
119
120L(odd):
121	C r3	return value
122	C r4
123	C r5	&dst[size-2]
124	C r6	shift
125	C r7	32-shift
126	C r8	src[size-2] >> shift
127	C r9
128	C r10	src[size-1]
129
130	slw	r11, r10, r7
131	srw	r12, r10, r6
132
133	or	r8, r8, r11
134	stw	r12, 8(r5)	C dst[size-1]
135
136	stw	r8, 4(r5)	C dst[size-2]
137	blr
138
139
140L(one):
141	C r3	return value
142	C r4
143	C r5	dst-4
144	C r6	shift
145	C r7
146	C r8	src[0]
147
148	srw	r8, r8, r6
149
150	stw	r8, 4(r5)	C dst[0]
151	blr
152
153EPILOGUE(mpn_rshift)
154