1*16031f7dSrin /* $NetBSD: spe.c,v 1.11 2020/07/06 09:34:16 rin Exp $ */
2b8ea2c8cSmatt
3b8ea2c8cSmatt /*-
4b8ea2c8cSmatt * Copyright (c) 2011 The NetBSD Foundation, Inc.
5b8ea2c8cSmatt * All rights reserved.
6b8ea2c8cSmatt *
7b8ea2c8cSmatt * This code is derived from software contributed to The NetBSD Foundation
8b8ea2c8cSmatt * by Matt Thomas of 3am Software Foundry.
9b8ea2c8cSmatt *
10b8ea2c8cSmatt * Redistribution and use in source and binary forms, with or without
11b8ea2c8cSmatt * modification, are permitted provided that the following conditions
12b8ea2c8cSmatt * are met:
13b8ea2c8cSmatt * 1. Redistributions of source code must retain the above copyright
14b8ea2c8cSmatt * notice, this list of conditions and the following disclaimer.
15b8ea2c8cSmatt * 2. Redistributions in binary form must reproduce the above copyright
16b8ea2c8cSmatt * notice, this list of conditions and the following disclaimer in the
17b8ea2c8cSmatt * documentation and/or other materials provided with the distribution.
18b8ea2c8cSmatt *
19b8ea2c8cSmatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20b8ea2c8cSmatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21b8ea2c8cSmatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22b8ea2c8cSmatt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23b8ea2c8cSmatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24b8ea2c8cSmatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25b8ea2c8cSmatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26b8ea2c8cSmatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27b8ea2c8cSmatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28b8ea2c8cSmatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29b8ea2c8cSmatt * POSSIBILITY OF SUCH DAMAGE.
30b8ea2c8cSmatt */
31b8ea2c8cSmatt
32b8ea2c8cSmatt #include <sys/cdefs.h>
33*16031f7dSrin __KERNEL_RCSID(0, "$NetBSD: spe.c,v 1.11 2020/07/06 09:34:16 rin Exp $");
34b8ea2c8cSmatt
35*16031f7dSrin #ifdef _KERNEL_OPT
36b8ea2c8cSmatt #include "opt_altivec.h"
37*16031f7dSrin #endif
38b8ea2c8cSmatt
39b8ea2c8cSmatt #ifdef PPC_HAVE_SPE
40b8ea2c8cSmatt
41b8ea2c8cSmatt #include <sys/param.h>
42b8ea2c8cSmatt #include <sys/proc.h>
43b8ea2c8cSmatt #include <sys/systm.h>
44b8ea2c8cSmatt #include <sys/atomic.h>
45b8ea2c8cSmatt #include <sys/siginfo.h>
46f36a02e8Smatt #include <sys/pcu.h>
47b8ea2c8cSmatt
48b8ea2c8cSmatt #include <powerpc/altivec.h>
49b8ea2c8cSmatt #include <powerpc/spr.h>
50b8ea2c8cSmatt #include <powerpc/booke/spr.h>
51b8ea2c8cSmatt #include <powerpc/psl.h>
52b8ea2c8cSmatt #include <powerpc/pcb.h>
53b8ea2c8cSmatt
542b803018Smatt static void vec_state_load(lwp_t *, u_int);
55d67ab12cSrmind static void vec_state_save(lwp_t *);
56d67ab12cSrmind static void vec_state_release(lwp_t *);
57b8ea2c8cSmatt
58f36a02e8Smatt const pcu_ops_t vec_ops = {
59f36a02e8Smatt .pcu_id = PCU_VEC,
60f36a02e8Smatt .pcu_state_load = vec_state_load,
61f36a02e8Smatt .pcu_state_save = vec_state_save,
62f36a02e8Smatt .pcu_state_release = vec_state_release,
63f36a02e8Smatt };
64f36a02e8Smatt
65f36a02e8Smatt bool
vec_used_p(lwp_t * l)66f36a02e8Smatt vec_used_p(lwp_t *l)
67f36a02e8Smatt {
68877a3ccfSchs return pcu_valid_p(&vec_ops, l);
69f36a02e8Smatt }
70f36a02e8Smatt
71f36a02e8Smatt void
vec_mark_used(lwp_t * l)72f36a02e8Smatt vec_mark_used(lwp_t *l)
73f36a02e8Smatt {
74877a3ccfSchs pcu_discard(&vec_ops, l, true);
75f36a02e8Smatt }
76f36a02e8Smatt
77f36a02e8Smatt void
vec_state_load(lwp_t * l,u_int flags)782b803018Smatt vec_state_load(lwp_t *l, u_int flags)
79f36a02e8Smatt {
80f36a02e8Smatt struct pcb * const pcb = lwp_getpcb(l);
81b8ea2c8cSmatt
82d67ab12cSrmind if ((flags & PCU_VALID) == 0) {
838c87bc1fSmatt memset(&pcb->pcb_vr, 0, sizeof(pcb->pcb_vr));
848c87bc1fSmatt vec_mark_used(l);
858c87bc1fSmatt }
868c87bc1fSmatt
87b8ea2c8cSmatt /*
88b8ea2c8cSmatt * Enable SPE temporarily (and disable interrupts).
89b8ea2c8cSmatt */
90b8ea2c8cSmatt const register_t msr = mfmsr();
91b8ea2c8cSmatt mtmsr((msr & ~PSL_EE) | PSL_SPV);
92b8ea2c8cSmatt __asm volatile ("isync");
93b8ea2c8cSmatt
94b8ea2c8cSmatt /*
95b8ea2c8cSmatt * Call an assembly routine to do load everything.
96b8ea2c8cSmatt */
97b8ea2c8cSmatt vec_load_from_vreg(&pcb->pcb_vr);
98b8ea2c8cSmatt __asm volatile ("sync");
99f36a02e8Smatt
100b8ea2c8cSmatt
101b8ea2c8cSmatt /*
102b8ea2c8cSmatt * Restore MSR (turn off SPE)
103b8ea2c8cSmatt */
104b8ea2c8cSmatt mtmsr(msr);
105f36a02e8Smatt __asm volatile ("isync");
106f36a02e8Smatt
107f36a02e8Smatt /*
108b183b3efSmatt * Set PSL_SPV so vectors will be enabled on return to user.
109f36a02e8Smatt */
11087f580cfSmatt l->l_md.md_utf->tf_srr1 |= PSL_SPV;
111b8ea2c8cSmatt }
112b8ea2c8cSmatt
113b8ea2c8cSmatt void
vec_state_save(lwp_t * l)114d67ab12cSrmind vec_state_save(lwp_t *l)
115b8ea2c8cSmatt {
116f36a02e8Smatt struct pcb * const pcb = lwp_getpcb(l);
117f36a02e8Smatt
118b8ea2c8cSmatt /*
119b8ea2c8cSmatt * Turn on SPE, turn off interrupts.
120b8ea2c8cSmatt */
121b8ea2c8cSmatt const register_t msr = mfmsr();
122b8ea2c8cSmatt mtmsr((msr & ~PSL_EE) | PSL_SPV);
123b8ea2c8cSmatt __asm volatile ("isync");
124b8ea2c8cSmatt
125b8ea2c8cSmatt /*
126b8ea2c8cSmatt * Save the vector state which is best done in assembly.
127b8ea2c8cSmatt */
128b8ea2c8cSmatt vec_unload_to_vreg(&pcb->pcb_vr);
129f36a02e8Smatt __asm volatile ("sync");
130b8ea2c8cSmatt
131b8ea2c8cSmatt /*
132b8ea2c8cSmatt * Restore MSR (turn off SPE)
133b8ea2c8cSmatt */
134b8ea2c8cSmatt mtmsr(msr);
135f36a02e8Smatt __asm volatile ("isync");
136b8ea2c8cSmatt }
137b8ea2c8cSmatt
138b8ea2c8cSmatt void
vec_state_release(lwp_t * l)139d67ab12cSrmind vec_state_release(lwp_t *l)
140b8ea2c8cSmatt {
141b8ea2c8cSmatt /*
142f36a02e8Smatt * Turn off SPV so the next SPE instruction will cause a
143f36a02e8Smatt * SPE unavailable exception
144b8ea2c8cSmatt */
145f36a02e8Smatt l->l_md.md_utf->tf_srr1 &= ~PSL_SPV;
146b8ea2c8cSmatt }
147b8ea2c8cSmatt
148b8ea2c8cSmatt void
vec_restore_from_mcontext(lwp_t * l,const mcontext_t * mcp)149b8ea2c8cSmatt vec_restore_from_mcontext(lwp_t *l, const mcontext_t *mcp)
150b8ea2c8cSmatt {
151b8ea2c8cSmatt struct pcb * const pcb = lwp_getpcb(l);
152b8ea2c8cSmatt const union __vr *vr = mcp->__vrf.__vrs;
153b8ea2c8cSmatt
1540ccb325dSchs vec_save(l);
155b8ea2c8cSmatt
156b8ea2c8cSmatt /* grab the accumulator */
157b8ea2c8cSmatt pcb->pcb_vr.vreg[8][0] = vr->__vr32[2];
158b8ea2c8cSmatt pcb->pcb_vr.vreg[8][1] = vr->__vr32[3];
159b8ea2c8cSmatt
160b8ea2c8cSmatt /*
161b8ea2c8cSmatt * We store the high parts of each register in the first 8 vectors.
162b8ea2c8cSmatt */
163b8ea2c8cSmatt for (u_int i = 0; i < 8; i++, vr += 4) {
164b8ea2c8cSmatt pcb->pcb_vr.vreg[i][0] = vr[0].__vr32[0];
165b8ea2c8cSmatt pcb->pcb_vr.vreg[i][1] = vr[1].__vr32[0];
166b8ea2c8cSmatt pcb->pcb_vr.vreg[i][2] = vr[2].__vr32[0];
167b8ea2c8cSmatt pcb->pcb_vr.vreg[i][3] = vr[3].__vr32[0];
168b8ea2c8cSmatt }
169b8ea2c8cSmatt l->l_md.md_utf->tf_spefscr = pcb->pcb_vr.vscr = mcp->__vrf.__vscr;
170b8ea2c8cSmatt pcb->pcb_vr.vrsave = mcp->__vrf.__vrsave;
171b8ea2c8cSmatt }
172b8ea2c8cSmatt
173b8ea2c8cSmatt bool
vec_save_to_mcontext(lwp_t * l,mcontext_t * mcp,unsigned int * flagp)174b8ea2c8cSmatt vec_save_to_mcontext(lwp_t *l, mcontext_t *mcp, unsigned int *flagp)
175b8ea2c8cSmatt {
176b8ea2c8cSmatt struct pcb * const pcb = lwp_getpcb(l);
177b8ea2c8cSmatt
178f36a02e8Smatt if (!vec_used_p(l))
179b8ea2c8cSmatt return false;
180b8ea2c8cSmatt
1810ccb325dSchs vec_save(l);
182b8ea2c8cSmatt
183b8ea2c8cSmatt mcp->__gregs[_REG_MSR] |= PSL_SPV;
184b8ea2c8cSmatt
185b8ea2c8cSmatt union __vr *vr = mcp->__vrf.__vrs;
186b8ea2c8cSmatt const register_t *fixreg = l->l_md.md_utf->tf_fixreg;
187b8ea2c8cSmatt for (u_int i = 0; i < 32; i++, vr += 4, fixreg += 4) {
188b8ea2c8cSmatt vr[0].__vr32[0] = pcb->pcb_vr.vreg[i][0];
189b8ea2c8cSmatt vr[0].__vr32[1] = fixreg[0];
190b8ea2c8cSmatt vr[0].__vr32[2] = 0;
191b8ea2c8cSmatt vr[0].__vr32[3] = 0;
192b8ea2c8cSmatt vr[1].__vr32[0] = pcb->pcb_vr.vreg[i][1];
193b8ea2c8cSmatt vr[1].__vr32[1] = fixreg[1];
194b8ea2c8cSmatt vr[1].__vr32[2] = 0;
195b8ea2c8cSmatt vr[1].__vr32[3] = 0;
196b8ea2c8cSmatt vr[2].__vr32[0] = pcb->pcb_vr.vreg[i][2];
197b8ea2c8cSmatt vr[2].__vr32[1] = fixreg[2];
198b8ea2c8cSmatt vr[2].__vr32[2] = 0;
199b8ea2c8cSmatt vr[2].__vr32[3] = 0;
200b8ea2c8cSmatt vr[3].__vr32[0] = pcb->pcb_vr.vreg[i][3];
201b8ea2c8cSmatt vr[3].__vr32[1] = fixreg[3];
202b8ea2c8cSmatt vr[3].__vr32[2] = 0;
203b8ea2c8cSmatt vr[3].__vr32[3] = 0;
204b8ea2c8cSmatt }
205b8ea2c8cSmatt
206b8ea2c8cSmatt mcp->__vrf.__vrs[0].__vr32[2] = pcb->pcb_vr.vreg[8][0];
207b8ea2c8cSmatt mcp->__vrf.__vrs[0].__vr32[3] = pcb->pcb_vr.vreg[8][1];
208b8ea2c8cSmatt
209b8ea2c8cSmatt mcp->__vrf.__vrsave = pcb->pcb_vr.vrsave;
210b8ea2c8cSmatt mcp->__vrf.__vscr = l->l_md.md_utf->tf_spefscr;
211b8ea2c8cSmatt
212b8ea2c8cSmatt *flagp |= _UC_POWERPC_SPE;
213b8ea2c8cSmatt
214b8ea2c8cSmatt return true;
215b8ea2c8cSmatt }
216b8ea2c8cSmatt
217b8ea2c8cSmatt static const struct {
218b8ea2c8cSmatt uint32_t mask;
219b8ea2c8cSmatt int code;
220b8ea2c8cSmatt } spefscr_siginfo_map[] = {
221b8ea2c8cSmatt { SPEFSCR_FINV|SPEFSCR_FINVH, FPE_FLTINV },
222b8ea2c8cSmatt { SPEFSCR_FOVF|SPEFSCR_FOVFH, FPE_FLTOVF },
223b8ea2c8cSmatt { SPEFSCR_FUNF|SPEFSCR_FUNFH, FPE_FLTUND },
224b8ea2c8cSmatt { SPEFSCR_FX |SPEFSCR_FXH, FPE_FLTRES },
225b8ea2c8cSmatt { SPEFSCR_FDBZ|SPEFSCR_FDBZH, FPE_FLTDIV },
226b8ea2c8cSmatt { SPEFSCR_OV |SPEFSCR_OVH, FPE_INTOVF },
227b8ea2c8cSmatt };
228b8ea2c8cSmatt
229b8ea2c8cSmatt int
vec_siginfo_code(const struct trapframe * tf)230b8ea2c8cSmatt vec_siginfo_code(const struct trapframe *tf)
231b8ea2c8cSmatt {
232b8ea2c8cSmatt for (u_int i = 0; i < __arraycount(spefscr_siginfo_map); i++) {
233b8ea2c8cSmatt if (tf->tf_spefscr & spefscr_siginfo_map[i].mask)
234b8ea2c8cSmatt return spefscr_siginfo_map[i].code;
235b8ea2c8cSmatt }
236b8ea2c8cSmatt return 0;
237b8ea2c8cSmatt }
238b8ea2c8cSmatt
239b8ea2c8cSmatt #endif /* PPC_HAVE_SPE */
240