1*e7ea7ca2Smrg /* $NetBSD: altivec.c,v 1.35 2024/06/15 19:48:13 mrg Exp $ */
22b0e97a5Smatt
32b0e97a5Smatt /*
42b0e97a5Smatt * Copyright (C) 1996 Wolfgang Solfrank.
52b0e97a5Smatt * Copyright (C) 1996 TooLs GmbH.
62b0e97a5Smatt * All rights reserved.
72b0e97a5Smatt *
82b0e97a5Smatt * Redistribution and use in source and binary forms, with or without
92b0e97a5Smatt * modification, are permitted provided that the following conditions
102b0e97a5Smatt * are met:
112b0e97a5Smatt * 1. Redistributions of source code must retain the above copyright
122b0e97a5Smatt * notice, this list of conditions and the following disclaimer.
132b0e97a5Smatt * 2. Redistributions in binary form must reproduce the above copyright
142b0e97a5Smatt * notice, this list of conditions and the following disclaimer in the
152b0e97a5Smatt * documentation and/or other materials provided with the distribution.
162b0e97a5Smatt * 3. All advertising materials mentioning features or use of this software
172b0e97a5Smatt * must display the following acknowledgement:
182b0e97a5Smatt * This product includes software developed by TooLs GmbH.
192b0e97a5Smatt * 4. The name of TooLs GmbH may not be used to endorse or promote products
202b0e97a5Smatt * derived from this software without specific prior written permission.
212b0e97a5Smatt *
222b0e97a5Smatt * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
232b0e97a5Smatt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
242b0e97a5Smatt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
252b0e97a5Smatt * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
262b0e97a5Smatt * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
272b0e97a5Smatt * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
282b0e97a5Smatt * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
292b0e97a5Smatt * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
302b0e97a5Smatt * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
312b0e97a5Smatt * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
322b0e97a5Smatt */
33d505b189Smartin
34ed517291Slukem #include <sys/cdefs.h>
35*e7ea7ca2Smrg __KERNEL_RCSID(0, "$NetBSD: altivec.c,v 1.35 2024/06/15 19:48:13 mrg Exp $");
36d505b189Smartin
372b0e97a5Smatt #include <sys/param.h>
382b0e97a5Smatt #include <sys/proc.h>
392b0e97a5Smatt #include <sys/systm.h>
40b8ea2c8cSmatt #include <sys/atomic.h>
412b0e97a5Smatt
42b8ea2c8cSmatt #include <uvm/uvm_extern.h> /* for vcopypage/vzeropage */
4386f35f80Sthorpej
447146b2f6Srmind #include <powerpc/pcb.h>
452b0e97a5Smatt #include <powerpc/altivec.h>
465a4cb80dSmatt #include <powerpc/spr.h>
475a4cb80dSmatt #include <powerpc/oea/spr.h>
48b8ea2c8cSmatt #include <powerpc/psl.h>
492b0e97a5Smatt
502b803018Smatt static void vec_state_load(lwp_t *, u_int);
51d67ab12cSrmind static void vec_state_save(lwp_t *);
52d67ab12cSrmind static void vec_state_release(lwp_t *);
53f36a02e8Smatt
54f36a02e8Smatt const pcu_ops_t vec_ops = {
55f36a02e8Smatt .pcu_id = PCU_VEC,
56f36a02e8Smatt .pcu_state_load = vec_state_load,
57f36a02e8Smatt .pcu_state_save = vec_state_save,
58f36a02e8Smatt .pcu_state_release = vec_state_release,
59f36a02e8Smatt };
60f36a02e8Smatt
61f36a02e8Smatt bool
vec_used_p(lwp_t * l)62f36a02e8Smatt vec_used_p(lwp_t *l)
63f36a02e8Smatt {
64877a3ccfSchs return pcu_valid_p(&vec_ops, l);
65f36a02e8Smatt }
66d974db0aSgarbled
672b0e97a5Smatt void
vec_mark_used(lwp_t * l)68f36a02e8Smatt vec_mark_used(lwp_t *l)
692b0e97a5Smatt {
70877a3ccfSchs return pcu_discard(&vec_ops, l, true);
71f36a02e8Smatt }
72f36a02e8Smatt
73f36a02e8Smatt void
vec_state_load(lwp_t * l,u_int flags)742b803018Smatt vec_state_load(lwp_t *l, u_int flags)
75f36a02e8Smatt {
76f36a02e8Smatt struct pcb * const pcb = lwp_getpcb(l);
772b0e97a5Smatt
78d67ab12cSrmind if ((flags & PCU_VALID) == 0) {
798c87bc1fSmatt memset(&pcb->pcb_vr, 0, sizeof(pcb->pcb_vr));
808c87bc1fSmatt vec_mark_used(l);
818c87bc1fSmatt }
828c87bc1fSmatt
836efc5a99Smatt if ((flags & PCU_REENABLE) == 0) {
842b0e97a5Smatt /*
852b0e97a5Smatt * Enable AltiVec temporarily (and disable interrupts).
862b0e97a5Smatt */
87f36a02e8Smatt const register_t msr = mfmsr();
882b0e97a5Smatt mtmsr((msr & ~PSL_EE) | PSL_VEC);
895f1c88d7Sperry __asm volatile ("isync");
90b8ea2c8cSmatt
912b0e97a5Smatt /*
92b8ea2c8cSmatt * Load the vector unit from vreg which is best done in
93b8ea2c8cSmatt * assembly.
942b0e97a5Smatt */
95b8ea2c8cSmatt vec_load_from_vreg(&pcb->pcb_vr);
962b0e97a5Smatt
972b0e97a5Smatt /*
982b0e97a5Smatt * Restore MSR (turn off AltiVec)
992b0e97a5Smatt */
1002b0e97a5Smatt mtmsr(msr);
101f36a02e8Smatt __asm volatile ("isync");
1026efc5a99Smatt }
1036efc5a99Smatt
1046efc5a99Smatt /*
1056efc5a99Smatt * VRSAVE will be restored when trap frame returns
1066efc5a99Smatt */
1076efc5a99Smatt l->l_md.md_utf->tf_vrsave = pcb->pcb_vr.vrsave;
108f36a02e8Smatt
109f36a02e8Smatt /*
110f36a02e8Smatt * Mark vector registers as modified.
111f36a02e8Smatt */
112b183b3efSmatt l->l_md.md_flags |= PSL_VEC;
11387f580cfSmatt l->l_md.md_utf->tf_srr1 |= PSL_VEC;
1142b0e97a5Smatt }
1152b0e97a5Smatt
1162b0e97a5Smatt void
vec_state_save(lwp_t * l)117d67ab12cSrmind vec_state_save(lwp_t *l)
1182b0e97a5Smatt {
119f36a02e8Smatt struct pcb * const pcb = lwp_getpcb(l);
120f36a02e8Smatt
1212b0e97a5Smatt /*
1222b0e97a5Smatt * Turn on AltiVEC, turn off interrupts.
1232b0e97a5Smatt */
124b8ea2c8cSmatt const register_t msr = mfmsr();
1252b0e97a5Smatt mtmsr((msr & ~PSL_EE) | PSL_VEC);
1265f1c88d7Sperry __asm volatile ("isync");
1272b0e97a5Smatt
1282b0e97a5Smatt /*
129b8ea2c8cSmatt * Grab contents of vector unit.
1302b0e97a5Smatt */
131b8ea2c8cSmatt vec_unload_to_vreg(&pcb->pcb_vr);
1322b0e97a5Smatt
1332b0e97a5Smatt /*
1342b0e97a5Smatt * Save VRSAVE
1352b0e97a5Smatt */
136f36a02e8Smatt pcb->pcb_vr.vrsave = l->l_md.md_utf->tf_vrsave;
1372b0e97a5Smatt
1382b0e97a5Smatt /*
1392b0e97a5Smatt * Note that we aren't using any CPU resources and stop any
1402b0e97a5Smatt * data streams.
1412b0e97a5Smatt */
1425f1c88d7Sperry __asm volatile ("dssall; sync");
1432b0e97a5Smatt
144b8ea2c8cSmatt /*
1452b0e97a5Smatt * Restore MSR (turn off AltiVec)
1462b0e97a5Smatt */
1472b0e97a5Smatt mtmsr(msr);
148f36a02e8Smatt __asm volatile ("isync");
1492b0e97a5Smatt }
1502b0e97a5Smatt
1512b0e97a5Smatt void
vec_state_release(lwp_t * l)152d67ab12cSrmind vec_state_release(lwp_t *l)
1532b0e97a5Smatt {
154f36a02e8Smatt __asm volatile("dssall;sync");
155f36a02e8Smatt l->l_md.md_utf->tf_srr1 &= ~PSL_VEC;
156f36a02e8Smatt l->l_md.md_flags &= ~PSL_VEC;
1572b0e97a5Smatt }
1582b0e97a5Smatt
159b8ea2c8cSmatt void
vec_restore_from_mcontext(struct lwp * l,const mcontext_t * mcp)160b8ea2c8cSmatt vec_restore_from_mcontext(struct lwp *l, const mcontext_t *mcp)
161b8ea2c8cSmatt {
162b8ea2c8cSmatt struct pcb * const pcb = lwp_getpcb(l);
163b8ea2c8cSmatt
164f36a02e8Smatt KASSERT(l == curlwp);
165f36a02e8Smatt
166*e7ea7ca2Smrg /* Nothing to do here. */
167*e7ea7ca2Smrg if (!vec_used_p(l))
168*e7ea7ca2Smrg return;
169*e7ea7ca2Smrg
170b8ea2c8cSmatt /* we don't need to save the state, just drop it */
171877a3ccfSchs pcu_discard(&vec_ops, l, true);
172877a3ccfSchs
173abf14850Sthorpej if (mcp != NULL) { /* XXX see compat_16_sys___sigreturn14() */
174abf14850Sthorpej memcpy(pcb->pcb_vr.vreg, &mcp->__vrf.__vrs,
175abf14850Sthorpej sizeof (pcb->pcb_vr.vreg));
176b8ea2c8cSmatt pcb->pcb_vr.vscr = mcp->__vrf.__vscr;
177b8ea2c8cSmatt pcb->pcb_vr.vrsave = mcp->__vrf.__vrsave;
178b8ea2c8cSmatt l->l_md.md_utf->tf_vrsave = pcb->pcb_vr.vrsave;
179b8ea2c8cSmatt }
180abf14850Sthorpej }
181b8ea2c8cSmatt
182b8ea2c8cSmatt bool
vec_save_to_mcontext(struct lwp * l,mcontext_t * mcp,unsigned int * flagp)183b8ea2c8cSmatt vec_save_to_mcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flagp)
184b8ea2c8cSmatt {
185b8ea2c8cSmatt struct pcb * const pcb = lwp_getpcb(l);
186b8ea2c8cSmatt
187f36a02e8Smatt KASSERT(l == curlwp);
188f36a02e8Smatt
189f36a02e8Smatt /* Save AltiVec context, if any. */
190f36a02e8Smatt if (!vec_used_p(l))
191f36a02e8Smatt return false;
192f36a02e8Smatt
193b8ea2c8cSmatt /*
194b8ea2c8cSmatt * If we're the AltiVec owner, dump its context to the PCB first.
195b8ea2c8cSmatt */
196877a3ccfSchs pcu_save(&vec_ops, l);
197b8ea2c8cSmatt
198abf14850Sthorpej if (mcp != NULL) { /* XXX see sendsig_sigcontext() */
199b8ea2c8cSmatt mcp->__gregs[_REG_MSR] |= PSL_VEC;
200b8ea2c8cSmatt mcp->__vrf.__vscr = pcb->pcb_vr.vscr;
201b8ea2c8cSmatt mcp->__vrf.__vrsave = l->l_md.md_utf->tf_vrsave;
202abf14850Sthorpej memcpy(mcp->__vrf.__vrs, pcb->pcb_vr.vreg,
203abf14850Sthorpej sizeof (mcp->__vrf.__vrs));
204b8ea2c8cSmatt *flagp |= _UC_POWERPC_VEC;
205abf14850Sthorpej }
206b8ea2c8cSmatt return true;
207b8ea2c8cSmatt }
208b8ea2c8cSmatt
2092b0e97a5Smatt #define ZERO_VEC 19
2102b0e97a5Smatt
2112b0e97a5Smatt void
vzeropage(paddr_t pa)2122b0e97a5Smatt vzeropage(paddr_t pa)
2132b0e97a5Smatt {
21486f35f80Sthorpej const paddr_t ea = pa + PAGE_SIZE;
2152b0e97a5Smatt uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16);
2166d251b3bSmatt register_t omsr, msr;
2172b0e97a5Smatt
2185f1c88d7Sperry __asm volatile("mfmsr %0" : "=r"(omsr) :);
2192b0e97a5Smatt
2202b0e97a5Smatt /*
2212b0e97a5Smatt * Turn on AltiVec, turn off interrupts.
2222b0e97a5Smatt */
2232b0e97a5Smatt msr = (omsr & ~PSL_EE) | PSL_VEC;
2245f1c88d7Sperry __asm volatile("sync; mtmsr %0; isync" :: "r"(msr));
2252b0e97a5Smatt
2262b0e97a5Smatt /*
2272b0e97a5Smatt * Save the VEC register we are going to use before we disable
2282b0e97a5Smatt * relocation.
2292b0e97a5Smatt */
2302b0e97a5Smatt __asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
2312b0e97a5Smatt __asm("vxor %0,%0,%0" :: "n"(ZERO_VEC));
2322b0e97a5Smatt
2332b0e97a5Smatt /*
2342b0e97a5Smatt * Zero the page using a single cache line.
2352b0e97a5Smatt */
2365f1c88d7Sperry __asm volatile(
2371e16e443Snathanw " sync ;"
2381e16e443Snathanw " mfmsr %[msr];"
2391e16e443Snathanw " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */
2401e16e443Snathanw " mtmsr %[msr];" /* Turn off DMMU */
2411e16e443Snathanw " isync;"
2421e16e443Snathanw "1: stvx %[zv], %[pa], %[off0];"
2431e16e443Snathanw " stvxl %[zv], %[pa], %[off16];"
2441e16e443Snathanw " stvx %[zv], %[pa], %[off32];"
2451e16e443Snathanw " stvxl %[zv], %[pa], %[off48];"
2461e16e443Snathanw " addi %[pa], %[pa], 64;"
2471e16e443Snathanw " cmplw %[pa], %[ea];"
2481e16e443Snathanw " blt+ 1b;"
2491e16e443Snathanw " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */
2501e16e443Snathanw " sync;"
2511e16e443Snathanw " mtmsr %[msr];" /* Turn on DMMU */
2521e16e443Snathanw " isync;"
2531e16e443Snathanw :: [msr] "r"(msr), [pa] "b"(pa), [ea] "b"(ea),
2541e16e443Snathanw [off0] "r"(0), [off16] "r"(16), [off32] "r"(32), [off48] "r"(48),
2551e16e443Snathanw [zv] "n"(ZERO_VEC));
2562b0e97a5Smatt
2572b0e97a5Smatt /*
2582b0e97a5Smatt * Restore VEC register (now that we can access the stack again).
2592b0e97a5Smatt */
2602b0e97a5Smatt __asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
2612b0e97a5Smatt
2622b0e97a5Smatt /*
2632b0e97a5Smatt * Restore old MSR (AltiVec OFF).
2642b0e97a5Smatt */
2655f1c88d7Sperry __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr));
2662b0e97a5Smatt }
2672b0e97a5Smatt
2682b0e97a5Smatt #define LO_VEC 16
2692b0e97a5Smatt #define HI_VEC 17
2702b0e97a5Smatt
2712b0e97a5Smatt void
vcopypage(paddr_t dst,paddr_t src)2722b0e97a5Smatt vcopypage(paddr_t dst, paddr_t src)
2732b0e97a5Smatt {
27486f35f80Sthorpej const paddr_t edst = dst + PAGE_SIZE;
2752b0e97a5Smatt uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16);
2766d251b3bSmatt register_t omsr, msr;
2772b0e97a5Smatt
2785f1c88d7Sperry __asm volatile("mfmsr %0" : "=r"(omsr) :);
2792b0e97a5Smatt
2802b0e97a5Smatt /*
2812b0e97a5Smatt * Turn on AltiVec, turn off interrupts.
2822b0e97a5Smatt */
2832b0e97a5Smatt msr = (omsr & ~PSL_EE) | PSL_VEC;
2845f1c88d7Sperry __asm volatile("sync; mtmsr %0; isync" :: "r"(msr));
2852b0e97a5Smatt
2862b0e97a5Smatt /*
2872b0e97a5Smatt * Save the VEC registers we will be using before we disable
2882b0e97a5Smatt * relocation.
2892b0e97a5Smatt */
2906d251b3bSmatt __asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
2916d251b3bSmatt __asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
2922b0e97a5Smatt
2932b0e97a5Smatt /*
2941e16e443Snathanw * Copy the page using a single cache line, with DMMU
2951e16e443Snathanw * disabled. On most PPCs, two vector registers occupy one
2961e16e443Snathanw * cache line.
2972b0e97a5Smatt */
2985f1c88d7Sperry __asm volatile(
2991e16e443Snathanw " sync ;"
3001e16e443Snathanw " mfmsr %[msr];"
3011e16e443Snathanw " rlwinm %[msr],%[msr],0,28,26;" /* Clear PSL_DR */
3021e16e443Snathanw " mtmsr %[msr];" /* Turn off DMMU */
3031e16e443Snathanw " isync;"
3041e16e443Snathanw "1: lvx %[lv], %[src], %[off0];"
3051e16e443Snathanw " stvx %[lv], %[dst], %[off0];"
3061e16e443Snathanw " lvxl %[hv], %[src], %[off16];"
3071e16e443Snathanw " stvxl %[hv], %[dst], %[off16];"
3081e16e443Snathanw " addi %[src], %[src], 32;"
3091e16e443Snathanw " addi %[dst], %[dst], 32;"
3101e16e443Snathanw " cmplw %[dst], %[edst];"
3111e16e443Snathanw " blt+ 1b;"
3121e16e443Snathanw " ori %[msr], %[msr], 0x10;" /* Set PSL_DR */
3131e16e443Snathanw " sync;"
3141e16e443Snathanw " mtmsr %[msr];" /* Turn on DMMU */
3151e16e443Snathanw " isync;"
3161e16e443Snathanw :: [msr] "r"(msr), [src] "b"(src), [dst] "b"(dst),
3171e16e443Snathanw [edst] "b"(edst), [off0] "r"(0), [off16] "r"(16),
3181e16e443Snathanw [lv] "n"(LO_VEC), [hv] "n"(HI_VEC));
3192b0e97a5Smatt
3202b0e97a5Smatt /*
3212b0e97a5Smatt * Restore VEC registers (now that we can access the stack again).
3222b0e97a5Smatt */
3236d251b3bSmatt __asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
3246d251b3bSmatt __asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
3252b0e97a5Smatt
3262b0e97a5Smatt /*
3272b0e97a5Smatt * Restore old MSR (AltiVec OFF).
3282b0e97a5Smatt */
3295f1c88d7Sperry __asm volatile("sync; mtmsr %0; isync" :: "r"(omsr));
3302b0e97a5Smatt }
331