xref: /netbsd-src/sys/arch/alpha/alpha/fp_complete.c (revision 2d93273ebd9d1840d6d98e634f2f7a05b707a1b7)
1 /* $NetBSD: fp_complete.c,v 1.31 2023/11/21 22:19:12 thorpej Exp $ */
2 
3 /*-
4  * Copyright (c) 2001 Ross Harvey
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the NetBSD
18  *	Foundation, Inc. and its contributors.
19  * 4. Neither the name of The NetBSD Foundation nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "opt_ddb.h"
37 
38 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
39 
40 __KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.31 2023/11/21 22:19:12 thorpej Exp $");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/atomic.h>
46 #include <sys/evcnt.h>
47 
48 #include <machine/cpu.h>
49 #include <machine/fpu.h>
50 #include <machine/reg.h>
51 #include <machine/alpha.h>
52 #include <machine/alpha_instruction.h>
53 
54 #include <lib/libkern/softfloat.h>
55 
56 /*
57  * Validate our assumptions about bit positions.
58  */
59 __CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1));
60 __CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ  << 1));
61 __CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1));
62 __CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1));
63 __CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1));
64 __CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1));
65 
66 __CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1));
67 __CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ  << 1));
68 __CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1));
69 __CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1));
70 __CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1));
71 
72 __CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED);
73 __CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD);
74 __CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD);
75 __CTASSERT((uint64_t)FP_X_DZ  << (49 - 0) == FPCR_DZED);
76 __CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD);
77 
78 __CTASSERT(FP_C_ALLBITS == MDLWP_FP_C);
79 
80 #define	TSWINSIZE 4	/* size of trap shadow window in uint32_t units */
81 
82 /*	Set Name		Opcodes			AARM C.* Symbols  */
83 
84 #define	CPUREG_CLASS		(0xfUL << 0x10)		/* INT[ALSM]	  */
85 #define	FPUREG_CLASS		(0xfUL << 0x14)		/* ITFP, FLT[ILV] */
86 #define	CHECKFUNCTIONCODE	(1UL << 0x18)		/* MISC		  */
87 #define	TRAPSHADOWBOUNDARY	(1UL << 0x00 |		/* PAL		  */\
88 				 1UL << 0x19 |		/* \PAL\	  */\
89 				 1UL << 0x1a |		/* JSR		  */\
90 				 1UL << 0x1b |		/* \PAL\	  */\
91 				 1UL << 0x1d |		/* \PAL\	  */\
92 				 1UL << 0x1e |		/* \PAL\	  */\
93 				 1UL << 0x1f |		/* \PAL\	  */\
94 				 0xffffUL << 0x30 | 	/* branch ops	  */\
95 				 CHECKFUNCTIONCODE)
96 
97 #define	MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
98 	(u_int ## width ## _t)(sign) << ((width) - 1)			|\
99 	(u_int ## width ## _t)(exp)  << ((width) - 1 - (expwidth))	|\
100 	(u_int ## width ## _t)(msb)  << ((width) - 1 - (expwidth) - 1)	|\
101 	(u_int ## width ## _t)(rest_of_frac)
102 
103 #define	FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
104 #define	FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
105 
106 #define IS_SUBNORMAL(v)	((v)->exp == 0 && (v)->frac != 0)
107 
108 #define	PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ	\
109 				     && IS_SUBNORMAL(v))		\
110 					 (v)->frac = 0; else
111 
112 #define	POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ	\
113 				      && IS_SUBNORMAL(v))		\
114 					  (v)->frac = 0; else
115 
116 	/* Alpha returns 2.0 for true, all zeroes for false. */
117 
118 #define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
119 
120 	/* Move bits from sw fp_c to hw fpcr. */
121 
122 #define	CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
123 
124 struct evcnt fpevent_use;
125 struct evcnt fpevent_reuse;
126 
127 /*
128  * Temporary trap shadow instrumentation. The [un]resolved counters
129  * could be kept permanently, as they provide information on whether
130  * user code has met AARM trap shadow generation requirements.
131  */
132 
133 struct alpha_shadow {
134 	uint64_t resolved;	/* cases trigger pc found */
135 	uint64_t unresolved;	/* cases it wasn't, code problems? */
136 	uint64_t scans;		/* trap shadow scans */
137 	uint64_t len;		/* number of instructions examined */
138 	uint64_t uop;		/* bit mask of unexpected opcodes */
139 	uint64_t sqrts;	/* ev6+ square root single count */
140 	uint64_t sqrtt;	/* ev6+ square root double count */
141 	uint32_t ufunc;	/* bit mask of unexpected functions */
142 	uint32_t max;		/* max trap shadow scan */
143 	uint32_t nilswop;	/* unexpected op codes */
144 	uint32_t nilswfunc;	/* unexpected function codes */
145 	uint32_t nilanyop;	/* this "cannot happen" */
146 	uint32_t vax;		/* sigs from vax fp opcodes */
147 } alpha_shadow, alpha_shadow_zero;
148 
149 static float64 float64_unk(float64, float64);
150 static float64 compare_un(float64, float64);
151 static float64 compare_eq(float64, float64);
152 static float64 compare_lt(float64, float64);
153 static float64 compare_le(float64, float64);
154 static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *);
155 static void cvt_gd(uint32_t, struct lwp *);
156 static void cvt_qt_dg_qg(uint32_t, struct lwp *);
157 static void cvt_tq_gq(uint32_t, struct lwp *);
158 
159 static float32 (*swfp_s[])(float32, float32) = {
160 	float32_add, float32_sub, float32_mul, float32_div,
161 };
162 
163 static float64 (*swfp_t[])(float64, float64) = {
164 	float64_add, float64_sub, float64_mul, float64_div,
165 	compare_un,    compare_eq,    compare_lt,    compare_le,
166 	float64_unk, float64_unk, float64_unk, float64_unk
167 };
168 
169 static void (*swfp_cvt[])(uint32_t, struct lwp *) = {
170 	cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
171 };
172 
173 static void
this_cannot_happen(int what_cannot_happen,int64_t bits)174 this_cannot_happen(int what_cannot_happen, int64_t bits)
175 {
176 	static int total;
177 	alpha_instruction inst;
178 	static uint64_t reported;
179 
180 	inst.bits = bits;
181 	++alpha_shadow.nilswfunc;
182 	if (bits != -1)
183 		alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
184 	if (1UL << what_cannot_happen & reported)
185 		return;
186 	reported |= 1UL << what_cannot_happen;
187 	if (total >= 1000)
188 		return;	/* right now, this return "cannot happen" */
189 	++total;
190 	if (bits)
191 		printf("FP instruction %x\n", (unsigned int)bits);
192 	printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported,
193 	    alpha_shadow.uop);
194 	printf("Please report this to port-alpha-maintainer@NetBSD.org\n");
195 }
196 
197 static inline void
sts(unsigned int rn,s_float * v,struct lwp * l)198 sts(unsigned int rn, s_float *v, struct lwp *l)
199 {
200 	alpha_sts(rn, v);
201 	PREFILTER_SUBNORMAL(l, v);
202 }
203 
204 static inline void
stt(unsigned int rn,t_float * v,struct lwp * l)205 stt(unsigned int rn, t_float *v, struct lwp *l)
206 {
207 	alpha_stt(rn, v);
208 	PREFILTER_SUBNORMAL(l, v);
209 }
210 
211 static inline void
lds(unsigned int rn,s_float * v,struct lwp * l)212 lds(unsigned int rn, s_float *v, struct lwp *l)
213 {
214 	POSTFILTER_SUBNORMAL(l, v);
215 	alpha_lds(rn, v);
216 }
217 
218 static inline void
ldt(unsigned int rn,t_float * v,struct lwp * l)219 ldt(unsigned int rn, t_float *v, struct lwp *l)
220 {
221 	POSTFILTER_SUBNORMAL(l, v);
222 	alpha_ldt(rn, v);
223 }
224 
225 static float64
compare_lt(float64 a,float64 b)226 compare_lt(float64 a, float64 b)
227 {
228 	return CMP_RESULT(float64_lt_quiet(a, b));
229 }
230 
231 static float64
compare_le(float64 a,float64 b)232 compare_le(float64 a, float64 b)
233 {
234 	return CMP_RESULT(float64_le_quiet(a, b));
235 }
236 
237 static float64
compare_un(float64 a,float64 b)238 compare_un(float64 a, float64 b)
239 {
240 	if (float64_is_nan(a) | float64_is_nan(b)) {
241 		if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
242 			float_set_invalid();
243 		return CMP_RESULT(1);
244 	}
245 	return CMP_RESULT(0);
246 }
247 
248 static float64
compare_eq(float64 a,float64 b)249 compare_eq(float64 a, float64 b)
250 {
251 	return CMP_RESULT(float64_eq(a, b));
252 }
253 /*
254  * A note regarding the VAX FP ops.
255  *
256  * The AARM gives us complete leeway to set or not set status flags on VAX
257  * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
258  * flags by IEEE rules.  Many ops are common to d/f/g and s/t source types.
259  * For the purely vax ones, it's hard to imagine ever running them.
260  * (Generated VAX fp ops with completion flags? Hmm.)  We are careful never
261  * to panic, assert, or print unlimited output based on a path through the
262  * decoder, so weird cases don't become security issues.
263  */
264 static void
cvt_qs_ts_st_gf_qf(uint32_t inst_bits,struct lwp * l)265 cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l)
266 {
267 	t_float tfb, tfc;
268 	s_float sfb, sfc;
269 	alpha_instruction inst;
270 
271 	inst.bits = inst_bits;
272 	/*
273 	 * cvtst and cvtts have the same opcode, function, and source.  The
274 	 * distinction for cvtst is hidden in the illegal modifier combinations.
275 	 * We decode even the non-/s modifier, so that the fix-up-always mode
276 	 * works on ev6 and later. The rounding bits are unused and fixed for
277 	 * cvtst, so we check those too.
278 	 */
279 	switch(inst.float_format.function) {
280 	case op_cvtst:
281 	case op_cvtst_u:
282 		sts(inst.float_detail.fb, &sfb, l);
283 		tfc.i = float32_to_float64(sfb.i);
284 		ldt(inst.float_detail.fc, &tfc, l);
285 		return;
286 	}
287 	if(inst.float_detail.src == 2) {
288 		stt(inst.float_detail.fb, &tfb, l);
289 		sfc.i = float64_to_float32(tfb.i);
290 		lds(inst.float_detail.fc, &sfc, l);
291 		return;
292 	}
293 	/* 0: S/F */
294 	/* 1:  /D */
295 	/* 3: Q/Q */
296 	this_cannot_happen(5, inst.generic_format.opcode);
297 	tfc.i = FLOAT64QNAN;
298 	ldt(inst.float_detail.fc, &tfc, l);
299 	return;
300 }
301 
302 static void
cvt_gd(uint32_t inst_bits,struct lwp * l)303 cvt_gd(uint32_t inst_bits, struct lwp *l)
304 {
305 	t_float tfb, tfc;
306 	alpha_instruction inst;
307 
308 	inst.bits = inst_bits;
309 	stt(inst.float_detail.fb, &tfb, l);
310 	(void) float64_to_float32(tfb.i);
311 	l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP);
312 	tfc.i = float64_add(tfb.i, (float64)0);
313 	ldt(inst.float_detail.fc, &tfc, l);
314 }
315 
316 static void
cvt_qt_dg_qg(uint32_t inst_bits,struct lwp * l)317 cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l)
318 {
319 	t_float tfb, tfc;
320 	alpha_instruction inst;
321 
322 	inst.bits = inst_bits;
323 	switch(inst.float_detail.src) {
324 	case 0:	/* S/F */
325 		this_cannot_happen(3, inst.bits);
326 		/* fall thru */
327 	case 1: /* D */
328 		/* VAX dirty 0's and reserved ops => UNPREDICTABLE */
329 		/* We've done what's important by just not trapping */
330 		tfc.i = 0;
331 		break;
332 	case 2: /* T/G */
333 		this_cannot_happen(4, inst.bits);
334 		tfc.i = 0;
335 		break;
336 	case 3:	/* Q/Q */
337 		stt(inst.float_detail.fb, &tfb, l);
338 		tfc.i = int64_to_float64(tfb.i);
339 		break;
340 	}
341 	alpha_ldt(inst.float_detail.fc, &tfc);
342 }
343 /*
344  * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
345  *      unfortunate habit of always returning the nontrapping result.
346  * XXX: there are several apparent AARM/AAH disagreements, as well as
347  *      the issue of trap handler pc and trapping results.
348  */
349 static void
cvt_tq_gq(uint32_t inst_bits,struct lwp * l)350 cvt_tq_gq(uint32_t inst_bits, struct lwp *l)
351 {
352 	t_float tfb, tfc;
353 	alpha_instruction inst;
354 
355 	inst.bits = inst_bits;
356 	stt(inst.float_detail.fb, &tfb, l);
357 	tfc.i = tfb.sign ? float64_to_int64(tfb.i) : float64_to_uint64(tfb.i);
358 	alpha_ldt(inst.float_detail.fc, &tfc);	/* yes, ldt */
359 }
360 
361 static uint64_t
fp_c_to_fpcr_1(uint64_t fpcr,uint64_t fp_c)362 fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c)
363 {
364 	uint64_t disables;
365 
366 	/*
367 	 * It's hard to arrange for conforming bit fields, because the FP_C
368 	 * and the FPCR are both architected, with specified (and relatively
369 	 * scrambled) bit numbers. Defining an internal unscrambled FP_C
370 	 * wouldn't help much, because every user exception requires the
371 	 * architected bit order in the sigcontext.
372 	 *
373 	 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
374 	 * will lose, because those bits can be and usually are subsetted;
375 	 * the official home is in the fp_c. Furthermore, the kernel puts
376 	 * phony enables (it lies :-) in the fpcr in order to get control when
377 	 * it is necessary to initially set a sticky bit.
378 	 */
379 
380 	fpcr &= FPCR_DYN_RM;
381 
382 	/*
383 	 * enable traps = case where flag bit is clear AND program wants a trap
384 	 *
385 	 * enables = ~flags & mask
386 	 * disables = ~(~flags | mask)
387 	 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
388 	 */
389 	disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
390 
391 	fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
392 	fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
393 
394 	fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
395 	fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
396 	if (fp_c & FP_C_MIRRORED)
397 		fpcr |= FPCR_SUM;
398 	if (fp_c & IEEE_MAP_UMZ)
399 		fpcr |= FPCR_UNDZ | FPCR_UNFD;
400 	fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
401 	return fpcr;
402 }
403 
404 static void
fp_c_to_fpcr(struct lwp * l)405 fp_c_to_fpcr(struct lwp *l)
406 {
407 	alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags));
408 }
409 
410 void
alpha_write_fp_c(struct lwp * l,uint64_t fp_c)411 alpha_write_fp_c(struct lwp *l, uint64_t fp_c)
412 {
413 	uint64_t md_flags;
414 
415 	fp_c &= MDLWP_FP_C;
416 	md_flags = l->l_md.md_flags;
417 	if ((md_flags & MDLWP_FP_C) == fp_c)
418 		return;
419 	l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
420 	kpreempt_disable();
421 	if (md_flags & MDLWP_FPACTIVE) {
422 		alpha_pal_wrfen(1);
423 		fp_c_to_fpcr(l);
424 		alpha_pal_wrfen(0);
425 	} else {
426 		struct pcb *pcb = l->l_addr;
427 
428 		pcb->pcb_fp.fpr_cr =
429 		    fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags);
430 	}
431 	kpreempt_enable();
432 }
433 
434 uint64_t
alpha_read_fp_c(struct lwp * l)435 alpha_read_fp_c(struct lwp *l)
436 {
437 	/*
438 	 * A possibly-desirable EV6-specific optimization would deviate from
439 	 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
440 	 * but in a transparent way. Some of the code for that would need to
441 	 * go right here.
442 	 */
443 	return l->l_md.md_flags & MDLWP_FP_C;
444 }
445 
446 static float64
float64_unk(float64 a,float64 b)447 float64_unk(float64 a, float64 b)
448 {
449 	return 0;
450 }
451 
452 /*
453  * The real function field encodings for IEEE and VAX FP instructions.
454  *
455  * Since there is only one operand type field, the cvtXX instructions
456  * require a variety of special cases, and these have to be analyzed as
457  * they don't always fit into the field descriptions in AARM section I.
458  *
459  * Lots of staring at bits in the appendix shows what's really going on.
460  *
461  *	   |	       |
462  * 15 14 13|12 11 10 09|08 07 06 05
463  * --------======------============
464  *  TRAP   : RND : SRC : FUNCTION  :
465  *  0  0  0:. . .:. . . . . . . . . . . . Imprecise
466  *  0  0  1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
467  *	   |				 /V overfloat enable (if int output)
468  *  0  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
469  *  0  1  1|. . .:. . . . . . . . . . . . Unsupported
470  *  1  0  0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
471  *  1  0  1|. . .:. . . . . . . . . . . ./SU
472  *	   |				 /SV
473  *  1  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
474  *  1  1  1|. . .:. . . . . . . . . . . ./SUI (if FP output)	(IEEE only)
475  *	   |				 /SVI (if int output)   (IEEE only)
476  *  S  I  UV: In other words: bits 15:13 are S:I:UV, except that _usually_
477  *	   |  not all combinations are valid.
478  *	   |	       |
479  * 15 14 13|12 11 10 09|08 07 06 05
480  * --------======------============
481  *  TRAP   : RND : SRC : FUNCTION  :
482  *	   | 0	0 . . . . . . . . . . . ./C Chopped
483  *	   : 0	1 . . . . . . . . . . . ./M Minus Infinity
484  *	   | 1	0 . . . . . . . . . . . .   Normal
485  *	   : 1	1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
486  *	   |	       |
487  * 15 14 13|12 11 10 09|08 07 06 05
488  * --------======------============
489  *  TRAP   : RND : SRC : FUNCTION  :
490  *		   0 0. . . . . . . . . . S/F
491  *		   0 1. . . . . . . . . . -/D
492  *		   1 0. . . . . . . . . . T/G
493  *		   1 1. . . . . . . . . . Q/Q
494  *	   |	       |
495  * 15 14 13|12 11 10 09|08 07 06 05
496  * --------======------============
497  *  TRAP   : RND : SRC : FUNCTION  :
498  *			 0  0  0  0 . . . addX
499  *			 0  0  0  1 . . . subX
500  *			 0  0  1  0 . . . mulX
501  *			 0  0  1  1 . . . divX
502  *			 0  1  0  0 . . . cmpXun
503  *			 0  1  0  1 . . . cmpXeq
504  *			 0  1  1  0 . . . cmpXlt
505  *			 0  1  1  1 . . . cmpXle
506  *			 1  0  0  0 . . . reserved
507  *			 1  0  0  1 . . . reserved
508  *			 1  0  1  0 . . . sqrt[fg] (op_fix, not exactly "vax")
509  *			 1  0  1  1 . . . sqrt[st] (op_fix, not exactly "ieee")
510  *			 1  1  0  0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
511  *			 1  1  0  1 . . . cvtXd   (vax only)
512  *			 1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
513  *			 1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
514  *	   |	       |
515  * 15 14 13|12 11 10 09|08 07 06 05	  the twilight zone
516  * --------======------============
517  *  TRAP   : RND : SRC : FUNCTION  :
518  * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
519  *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
520  *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
521  *  x  0  x  x  x  x  0	 1  1  1  1 . . . cvttq/_ (src == T)
522  */
523 
524 static void
print_fp_instruction(unsigned long pc,struct lwp * l,uint32_t bits)525 print_fp_instruction(unsigned long pc, struct lwp *l, uint32_t bits)
526 {
527 #if defined(DDB)
528 	char buf[32];
529 	struct alpha_print_instruction_context ctx = {
530 		.insn.bits = bits,
531 		.pc = pc,
532 		.buf = buf,
533 		.bufsize = sizeof(buf),
534 	};
535 
536 	(void) alpha_print_instruction(&ctx);
537 
538 	printf("INSN [%s:%d] @0x%lx -> %s\n",
539 	    l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf);
540 #else
541 	alpha_instruction insn = {
542 		.bits = bits,
543 	};
544 	printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n",
545 	    l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
546 	    insn.float_format.opcode, insn.float_format.function,
547 	    insn.float_format.fa, insn.float_format.fb, insn.float_format.fc);
548 	printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n",
549 	    l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
550 	    insn.float_detail.trp, insn.float_detail.rnd,
551 	    insn.float_detail.src, insn.float_detail.opclass);
552 #endif /* DDB */
553 }
554 
555 static void
alpha_fp_interpret(unsigned long pc,struct lwp * l,uint32_t bits)556 alpha_fp_interpret(unsigned long pc, struct lwp *l, uint32_t bits)
557 {
558 	s_float sfa, sfb, sfc;
559 	t_float tfa, tfb, tfc;
560 	alpha_instruction inst;
561 
562 	if (alpha_fp_complete_debug) {
563 		print_fp_instruction(pc, l, bits);
564 	}
565 
566 	inst.bits = bits;
567 	switch(inst.generic_format.opcode) {
568 	default:
569 		/* this "cannot happen" */
570 		this_cannot_happen(2, inst.bits);
571 		return;
572 	case op_any_float:
573 		if (inst.float_format.function == op_cvtql_sv ||
574 		    inst.float_format.function == op_cvtql_v) {
575 			alpha_stt(inst.float_detail.fb, &tfb);
576 			sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
577 			alpha_lds(inst.float_detail.fc, &sfc);
578 			float_raise(FP_X_INV);
579 		} else {
580 			++alpha_shadow.nilanyop;
581 			this_cannot_happen(3, inst.bits);
582 		}
583 		break;
584 	case op_vax_float:
585 		++alpha_shadow.vax;	/* fall thru */
586 	case op_ieee_float:
587 	case op_fix_float:
588 		switch(inst.float_detail.src) {
589 		case op_src_sf:
590 			sts(inst.float_detail.fb, &sfb, l);
591 			if (inst.float_detail.opclass == 11)
592 				sfc.i = float32_sqrt(sfb.i);
593 			else if (inst.float_detail.opclass & ~3) {
594 				this_cannot_happen(1, inst.bits);
595 				sfc.i = FLOAT32QNAN;
596 			} else {
597 				sts(inst.float_detail.fa, &sfa, l);
598 				sfc.i = (*swfp_s[inst.float_detail.opclass])(
599 				    sfa.i, sfb.i);
600 			}
601 			lds(inst.float_detail.fc, &sfc, l);
602 			break;
603 		case op_src_xd:
604 		case op_src_tg:
605 			if (inst.float_detail.opclass >= 12)
606 				(*swfp_cvt[inst.float_detail.opclass - 12])(
607 				    inst.bits, l);
608 			else {
609 				stt(inst.float_detail.fb, &tfb, l);
610 				if (inst.float_detail.opclass == 11)
611 					tfc.i = float64_sqrt(tfb.i);
612 				else {
613 					stt(inst.float_detail.fa, &tfa, l);
614 					tfc.i = (*swfp_t[inst.float_detail
615 					    .opclass])(tfa.i, tfb.i);
616 				}
617 				ldt(inst.float_detail.fc, &tfc, l);
618 			}
619 			break;
620 		case op_src_qq:
621 			float_raise(FP_X_IMP);
622 			break;
623 		}
624 	}
625 }
626 
627 int
alpha_fp_complete_at(unsigned long trigger_pc,struct lwp * l,uint64_t * ucode)628 alpha_fp_complete_at(unsigned long trigger_pc, struct lwp *l, uint64_t *ucode)
629 {
630 	int needsig;
631 	alpha_instruction inst;
632 	uint64_t rm, fpcr, orig_fpcr;
633 	uint64_t orig_flags, new_flags, changed_flags, md_flags;
634 
635 	if (__predict_false(ufetch_32((void *)trigger_pc, &inst.bits))) {
636 		this_cannot_happen(6, -1);
637 		return SIGSEGV;
638 	}
639 	kpreempt_disable();
640 	if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
641 		fpu_load();
642 	}
643 	alpha_pal_wrfen(1);
644 	/*
645 	 * Alpha FLOAT instructions can override the rounding mode on a
646 	 * per-instruction basis.  If necessary, lie about the dynamic
647 	 * rounding mode so emulation software need go to only one place
648 	 * for it, and so we don't have to lock any memory locations or
649 	 * pass a third parameter to every SoftFloat entry point.
650 	 *
651 	 * N.B. the rounding mode field of the FLOAT format instructions
652 	 * matches that of the FPCR *except* for the value 3, which means
653 	 * "dynamic" rounding mode (i.e. what is programmed into the FPCR).
654 	 */
655 	orig_fpcr = fpcr = alpha_read_fpcr();
656 	rm = inst.float_detail.rnd;
657 	if (__predict_false(rm != 3 /* dynamic */ &&
658 			    rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) {
659 		fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM);
660 		alpha_write_fpcr(fpcr);
661 	}
662 	orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
663 
664 	alpha_fp_interpret(trigger_pc, l, inst.bits);
665 
666 	md_flags = l->l_md.md_flags;
667 
668 	new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
669 	changed_flags = orig_flags ^ new_flags;
670 	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
671 	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
672 	needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
673 	alpha_pal_wrfen(0);
674 	kpreempt_enable();
675 	if (__predict_false(needsig)) {
676 		*ucode = needsig;
677 		return SIGFPE;
678 	}
679 	return 0;
680 }
681 
682 int
alpha_fp_complete(u_long a0,u_long a1,struct lwp * l,uint64_t * ucode)683 alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
684 {
685 	int t;
686 	int sig;
687 	uint64_t op_class;
688 	alpha_instruction inst;
689 	/* "trigger_pc" is Compaq's term for the earliest faulting op */
690 	alpha_instruction *trigger_pc, *usertrap_pc;
691 	alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
692 
693 	if (alpha_fp_complete_debug) {
694 		printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx "
695 		       "FPCR=0x%lx FP_C=0x%lx\n",
696 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
697 		    a0, a1, alpha_read_fpcr(),
698 		    l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE));
699 	}
700 
701 	pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
702 	trigger_pc = pc - 1;	/* for ALPHA_AMASK_PAT case */
703 
704 	/*
705 	 * Start out with the code mirroring the exception flags
706 	 * (FP_X_*).  Shift right 1 bit to discard SWC to achieve
707 	 * this.
708 	 */
709 	*ucode = a0 >> 1;
710 
711 	if (cpu_amask & ALPHA_AMASK_PAT) {
712 		if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 ||
713 		    alpha_fp_sync_complete) {
714 			sig = alpha_fp_complete_at((u_long)trigger_pc, l,
715 			    ucode);
716 			goto resolved;
717 		}
718 	}
719 	if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0)
720 		goto unresolved;
721 /*
722  * At this point we are somewhere in the trap shadow of one or more instruc-
723  * tions that have trapped with software completion specified.  We have a mask
724  * of the registers written by trapping instructions.
725  *
726  * Now step backwards through the trap shadow, clearing bits in the
727  * destination write mask until the trigger instruction is found, and
728  * interpret this one instruction in SW. If a SIGFPE is not required, back up
729  * the PC until just after this instruction and restart. This will execute all
730  * trap shadow instructions between the trigger pc and the trap pc twice.
731  */
732 	trigger_pc = 0;
733 	win_begin = pc;
734 	++alpha_shadow.scans;
735 	t = alpha_shadow.len;
736 	for (--pc; a1; --pc) {
737 		++alpha_shadow.len;
738 		if (pc < win_begin) {
739 			win_begin = pc - TSWINSIZE + 1;
740 			if (copyin(win_begin, tsw, sizeof tsw)) {
741 				/* sigh, try to get just one */
742 				win_begin = pc;
743 				if (copyin(win_begin, tsw, 4)) {
744 					/*
745 					 * We're off the rails here; don't
746 					 * bother updating the FP_C.
747 					 */
748 					return SIGSEGV;
749 				}
750 			}
751 		}
752 		assert(win_begin <= pc && !((long)pc  & 3));
753 		inst = tsw[pc - win_begin];
754 		op_class = 1UL << inst.generic_format.opcode;
755 		if (op_class & FPUREG_CLASS) {
756 			a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
757 			trigger_pc = pc;
758 		} else if (op_class & CPUREG_CLASS) {
759 			a1 &= ~(1UL << inst.operate_generic_format.rc);
760 			trigger_pc = pc;
761 		} else if (op_class & TRAPSHADOWBOUNDARY) {
762 			if (op_class & CHECKFUNCTIONCODE) {
763 				if (inst.mem_format.displacement == op_trapb ||
764 				    inst.mem_format.displacement == op_excb)
765 					break;	/* code breaks AARM rules */
766 			} else
767 				break; /* code breaks AARM rules */
768 		}
769 		/* Some shadow-safe op, probably load, store, or FPTI class */
770 	}
771 	t = alpha_shadow.len - t;
772 	if (t > alpha_shadow.max)
773 		alpha_shadow.max = t;
774 	if (__predict_true(trigger_pc != 0 && a1 == 0)) {
775 		++alpha_shadow.resolved;
776 		sig = alpha_fp_complete_at((u_long)trigger_pc, l, ucode);
777 		goto resolved;
778 	} else {
779 		++alpha_shadow.unresolved;
780 	}
781 
782  unresolved: /* obligatory statement */;
783 	/*
784 	 * *ucode contains the exception bits (FP_X_*).  We need to
785 	 * update the FP_C and FPCR, and send a signal for any new
786 	 * trap that is enabled.
787 	 */
788 	uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
789 	uint64_t new_flags = orig_flags | *ucode;
790 	uint64_t changed_flags = orig_flags ^ new_flags;
791 	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
792 
793 	l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags);
794 
795 	kpreempt_disable();
796 	if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
797 		fpu_load();
798 	}
799 	alpha_pal_wrfen(1);
800 	uint64_t orig_fpcr = alpha_read_fpcr();
801 	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags));
802 	uint64_t needsig =
803 	    changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags);
804 	alpha_pal_wrfen(0);
805 	kpreempt_enable();
806 
807 	if (__predict_false(needsig)) {
808 		*ucode = needsig;
809 		return SIGFPE;
810 	}
811 	return 0;
812 
813  resolved:
814 	if (sig) {
815 		usertrap_pc = trigger_pc + 1;
816 		l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
817 	}
818 	return sig;
819 }
820 
821 /*
822  * Load the float-point context for the current lwp.
823  */
824 void
fpu_state_load(struct lwp * l,u_int flags)825 fpu_state_load(struct lwp *l, u_int flags)
826 {
827 	struct pcb * const pcb = lwp_getpcb(l);
828 	KASSERT(l == curlwp);
829 
830 #ifdef MULTIPROCESSOR
831 	/*
832 	 * If the LWP got switched to another CPU, pcu_switchpoint would have
833 	 * called state_release to clear MDLWP_FPACTIVE.  Now that we are back
834 	 * on the CPU that has our FP context, set MDLWP_FPACTIVE again.
835 	 */
836 	if (flags & PCU_REENABLE) {
837 		KASSERT(flags & PCU_VALID);
838 		l->l_md.md_flags |= MDLWP_FPACTIVE;
839 		return;
840 	}
841 #else
842 	KASSERT((flags & PCU_REENABLE) == 0);
843 #endif
844 
845 	/*
846 	 * Instrument FP usage -- if a process had not previously
847 	 * used FP, mark it as having used FP for the first time,
848 	 * and count this event.
849 	 *
850 	 * If a process has used FP, count a "used FP, and took
851 	 * a trap to use it again" event.
852 	 */
853 	if ((flags & PCU_VALID) == 0) {
854 		atomic_inc_ulong(&fpevent_use.ev_count);
855 	} else {
856 		atomic_inc_ulong(&fpevent_reuse.ev_count);
857 	}
858 
859 	if (alpha_fp_complete_debug) {
860 		printf("%s: [%s:%d] loading FPCR=0x%lx\n",
861 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
862 		    pcb->pcb_fp.fpr_cr);
863 	}
864 	alpha_pal_wrfen(1);
865 	restorefpstate(&pcb->pcb_fp);
866 	alpha_pal_wrfen(0);
867 
868 	l->l_md.md_flags |= MDLWP_FPACTIVE;
869 }
870 
871 /*
872  * Save the FPU state.
873  */
874 
875 void
fpu_state_save(struct lwp * l)876 fpu_state_save(struct lwp *l)
877 {
878 	struct pcb * const pcb = lwp_getpcb(l);
879 
880 	alpha_pal_wrfen(1);
881 	savefpstate(&pcb->pcb_fp);
882 	alpha_pal_wrfen(0);
883 	if (alpha_fp_complete_debug) {
884 		printf("%s: [%s:%d] saved FPCR=0x%lx\n",
885 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
886 		    pcb->pcb_fp.fpr_cr);
887 	}
888 }
889 
890 /*
891  * Release the FPU.
892  */
893 void
fpu_state_release(struct lwp * l)894 fpu_state_release(struct lwp *l)
895 {
896 	l->l_md.md_flags &= ~MDLWP_FPACTIVE;
897 }
898