xref: /netbsd-src/sys/arch/powerpc/oea/altivec.c (revision 4b896b232495b7a9b8b94a1cf1e21873296d53b8)
1 /*	$NetBSD: altivec.c,v 1.7 2004/04/16 23:58:08 matt Exp $	*/
2 
3 /*
4  * Copyright (C) 1996 Wolfgang Solfrank.
5  * Copyright (C) 1996 TooLs GmbH.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by TooLs GmbH.
19  * 4. The name of TooLs GmbH may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: altivec.c,v 1.7 2004/04/16 23:58:08 matt Exp $");
36 
37 #include "opt_multiprocessor.h"
38 
39 #include <sys/param.h>
40 #include <sys/proc.h>
41 #include <sys/sa.h>
42 #include <sys/systm.h>
43 #include <sys/user.h>
44 #include <sys/malloc.h>
45 #include <sys/pool.h>
46 
47 #include <uvm/uvm_extern.h>
48 
49 #include <powerpc/altivec.h>
50 #include <powerpc/spr.h>
51 #include <powerpc/psl.h>
52 
53 void
54 enable_vec(void)
55 {
56 	struct cpu_info *ci = curcpu();
57 	struct lwp *l = curlwp;
58 	struct pcb *pcb = &l->l_addr->u_pcb;
59 	struct trapframe *tf = trapframe(l);
60 	struct vreg *vr = &pcb->pcb_vr;
61 	register_t msr;
62 
63 	KASSERT(pcb->pcb_veccpu == NULL);
64 
65 	pcb->pcb_flags |= PCB_ALTIVEC;
66 
67 	/*
68 	 * Enable AltiVec temporarily (and disable interrupts).
69 	 */
70 	msr = mfmsr();
71 	mtmsr((msr & ~PSL_EE) | PSL_VEC);
72 	__asm __volatile ("isync");
73 	if (ci->ci_veclwp) {
74 		save_vec_cpu();
75 	}
76 	KASSERT(curcpu()->ci_veclwp == NULL);
77 
78 	/*
79 	 * Restore VSCR by first loading it into a vector and then into VSCR.
80 	 * (this needs to done before loading the user's vector registers
81 	 * since we need to use a scratch vector register)
82 	 */
83 	__asm __volatile("vxor %2,%2,%2; lvewx %2,%0,%1; mtvscr %2" \
84 	    ::	"b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0));
85 
86 	/*
87 	 * VRSAVE will be restored when trap frame returns
88 	 */
89 	tf->tf_xtra[TF_VRSAVE] = vr->vrsave;
90 
91 #define	LVX(n,vr)	__asm /*__volatile*/("lvx %2,%0,%1" \
92 	    ::	"b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n));
93 
94 	/*
95 	 * Load all 32 vector registers
96 	 */
97 	LVX( 0,vr);	LVX( 1,vr);	LVX( 2,vr);	LVX( 3,vr);
98 	LVX( 4,vr);	LVX( 5,vr);	LVX( 6,vr);	LVX( 7,vr);
99 	LVX( 8,vr);	LVX( 9,vr);	LVX(10,vr);	LVX(11,vr);
100 	LVX(12,vr);	LVX(13,vr);	LVX(14,vr);	LVX(15,vr);
101 
102 	LVX(16,vr);	LVX(17,vr);	LVX(18,vr);	LVX(19,vr);
103 	LVX(20,vr);	LVX(21,vr);	LVX(22,vr);	LVX(23,vr);
104 	LVX(24,vr);	LVX(25,vr);	LVX(26,vr);	LVX(27,vr);
105 	LVX(28,vr);	LVX(29,vr);	LVX(30,vr);	LVX(31,vr);
106 	__asm __volatile ("isync");
107 
108 	/*
109 	 * Enable AltiVec when we return to user-mode.
110 	 * Record the new ownership of the AltiVec unit.
111 	 */
112 	curcpu()->ci_veclwp = l;
113 	pcb->pcb_veccpu = curcpu();
114 	__asm __volatile ("sync");
115 
116 	/*
117 	 * Restore MSR (turn off AltiVec)
118 	 */
119 	mtmsr(msr);
120 }
121 
122 void
123 save_vec_cpu(void)
124 {
125 	struct cpu_info *ci = curcpu();
126 	struct lwp *l;
127 	struct pcb *pcb;
128 	struct vreg *vr;
129 	struct trapframe *tf;
130 	register_t msr;
131 
132 	/*
133 	 * Turn on AltiVEC, turn off interrupts.
134 	 */
135 	msr = mfmsr();
136 	mtmsr((msr & ~PSL_EE) | PSL_VEC);
137 	__asm __volatile ("isync");
138 	l = ci->ci_veclwp;
139 	if (l == NULL)
140 		goto out;
141 	pcb = &l->l_addr->u_pcb;
142 	vr = &pcb->pcb_vr;
143 	tf = trapframe(l);
144 
145 #define	STVX(n,vr)	__asm /*__volatile*/("stvx %2,%0,%1" \
146 	    ::	"b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n));
147 
148 	/*
149 	 * Save the vector registers.
150 	 */
151 	STVX( 0,vr);	STVX( 1,vr);	STVX( 2,vr);	STVX( 3,vr);
152 	STVX( 4,vr);	STVX( 5,vr);	STVX( 6,vr);	STVX( 7,vr);
153 	STVX( 8,vr);	STVX( 9,vr);	STVX(10,vr);	STVX(11,vr);
154 	STVX(12,vr);	STVX(13,vr);	STVX(14,vr);	STVX(15,vr);
155 
156 	STVX(16,vr);	STVX(17,vr);	STVX(18,vr);	STVX(19,vr);
157 	STVX(20,vr);	STVX(21,vr);	STVX(22,vr);	STVX(23,vr);
158 	STVX(24,vr);	STVX(25,vr);	STVX(26,vr);	STVX(27,vr);
159 	STVX(28,vr);	STVX(29,vr);	STVX(30,vr);	STVX(31,vr);
160 
161 	/*
162 	 * Save VSCR (this needs to be done after save the vector registers
163 	 * since we need to use one as scratch).
164 	 */
165 	__asm __volatile("mfvscr %2; stvewx %2,%0,%1" \
166 	    ::	"b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0));
167 
168 	/*
169 	 * Save VRSAVE
170 	 */
171 	vr->vrsave = tf->tf_xtra[TF_VRSAVE];
172 
173 	/*
174 	 * Note that we aren't using any CPU resources and stop any
175 	 * data streams.
176 	 */
177 	pcb->pcb_veccpu = NULL;
178 	ci->ci_veclwp = NULL;
179 	__asm __volatile ("dssall; sync");
180 
181  out:
182 
183 	/*
184 	 * Restore MSR (turn off AltiVec)
185 	 */
186 	mtmsr(msr);
187 }
188 
189 /*
190  * Save a process's AltiVEC state to its PCB.  The state may be in any CPU.
191  * The process must either be curproc or traced by curproc (and stopped).
192  * (The point being that the process must not run on another CPU during
193  * this function).
194  */
195 void
196 save_vec_lwp(struct lwp *l, int discard)
197 {
198 	struct pcb * const pcb = &l->l_addr->u_pcb;
199 	struct cpu_info * const ci = curcpu();
200 
201 	/*
202 	 * If it's already in the PCB, there's nothing to do.
203 	 */
204 	if (pcb->pcb_veccpu == NULL)
205 		return;
206 
207 	/*
208 	 * If we simply need to discard the information, then don't
209 	 * to save anything.
210 	 */
211 	if (discard) {
212 #ifndef MULTIPROCESSOR
213 		KASSERT(ci == pcb->pcb_veccpu);
214 #endif
215 		KASSERT(l == pcb->pcb_veccpu->ci_veclwp);
216 		pcb->pcb_veccpu->ci_veclwp = NULL;
217 		pcb->pcb_veccpu = NULL;
218 		pcb->pcb_flags &= ~PCB_ALTIVEC;
219 		return;
220 	}
221 
222 	/*
223 	 * If the state is in the current CPU, just flush the current CPU's
224 	 * state.
225 	 */
226 	if (l == ci->ci_veclwp) {
227 		save_vec_cpu();
228 		return;
229 	}
230 
231 
232 #ifdef MULTIPROCESSOR
233 	/*
234 	 * It must be on another CPU, flush it from there.
235 	 */
236 
237 	mp_save_vec_lwp(l);
238 #endif
239 }
240 
241 #define ZERO_VEC	19
242 
243 void
244 vzeropage(paddr_t pa)
245 {
246 	const paddr_t ea = pa + PAGE_SIZE;
247 	uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16);
248 	register_t omsr, msr;
249 
250 	__asm __volatile("mfmsr %0" : "=r"(omsr) :);
251 
252 	/*
253 	 * Turn on AltiVec, turn off interrupts.
254 	 */
255 	msr = (omsr & ~PSL_EE) | PSL_VEC;
256 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
257 
258 	/*
259 	 * Save the VEC register we are going to use before we disable
260 	 * relocation.
261 	 */
262 	__asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
263 	__asm("vxor %0,%0,%0" :: "n"(ZERO_VEC));
264 
265 	/*
266 	 * Turn off data relocation (DMMU off).
267 	 */
268 	msr &= ~PSL_DR;
269 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
270 
271 	/*
272 	 * Zero the page using a single cache line.
273 	 */
274 	do {
275 		__asm("stvx %2,%0,%1" ::  "b"(pa), "r"( 0), "n"(ZERO_VEC));
276 		__asm("stvxl %2,%0,%1" :: "b"(pa), "r"(16), "n"(ZERO_VEC));
277 		__asm("stvx %2,%0,%1" ::  "b"(pa), "r"(32), "n"(ZERO_VEC));
278 		__asm("stvxl %2,%0,%1" :: "b"(pa), "r"(48), "n"(ZERO_VEC));
279 		pa += 64;
280 	} while (pa < ea);
281 
282 	/*
283 	 * Restore data relocation (DMMU on);
284 	 */
285 	msr |= PSL_DR;
286 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
287 
288 	/*
289 	 * Restore VEC register (now that we can access the stack again).
290 	 */
291 	__asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
292 
293 	/*
294 	 * Restore old MSR (AltiVec OFF).
295 	 */
296 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr));
297 }
298 
299 #define LO_VEC	16
300 #define HI_VEC	17
301 
302 void
303 vcopypage(paddr_t dst, paddr_t src)
304 {
305 	const paddr_t edst = dst + PAGE_SIZE;
306 	uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16);
307 	register_t omsr, msr;
308 
309 	__asm __volatile("mfmsr %0" : "=r"(omsr) :);
310 
311 	/*
312 	 * Turn on AltiVec, turn off interrupts.
313 	 */
314 	msr = (omsr & ~PSL_EE) | PSL_VEC;
315 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
316 
317 	/*
318 	 * Save the VEC registers we will be using before we disable
319 	 * relocation.
320 	 */
321 	__asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
322 	__asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
323 
324 	/*
325 	 * Turn off data relocation (DMMU off).
326 	 */
327 	msr &= ~PSL_DR;
328 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
329 
330 	/*
331 	 * Copy the page using a single cache line.  On most PPCs, two
332 	 * vector registers occupy one cache line.
333 	 */
334 	do {
335 		__asm("lvx %2,%0,%1"   :: "b"(src), "r"( 0), "n"(LO_VEC));
336 		__asm("stvx %2,%0,%1"  :: "b"(dst), "r"( 0), "n"(LO_VEC));
337 		__asm("lvxl %2,%0,%1"  :: "b"(src), "r"(16), "n"(HI_VEC));
338 		__asm("stvxl %2,%0,%1" :: "b"(dst), "r"(16), "n"(HI_VEC));
339 		src += 32;
340 		dst += 32;
341 	} while (dst < edst);
342 
343 	/*
344 	 * Restore data relocation (DMMU on);
345 	 */
346 	msr |= PSL_DR;
347 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
348 
349 	/*
350 	 * Restore VEC registers (now that we can access the stack again).
351 	 */
352 	__asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
353 	__asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
354 
355 	/*
356 	 * Restore old MSR (AltiVec OFF).
357 	 */
358 	__asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr));
359 }
360