xref: /netbsd-src/external/gpl3/gdb/dist/sim/mips/mdmx.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /* Simulation code for the MIPS MDMX ASE.
2    Copyright (C) 2002-2020 Free Software Foundation, Inc.
3    Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
4    Corporation (SiByte).
5 
6 This file is part of GDB, the GNU debugger.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include <stdio.h>
22 
23 #include "sim-main.h"
24 
25 /* Within mdmx.c we refer to the sim_cpu directly. */
26 #define CPU cpu
27 #define SD  (CPU_STATE(CPU))
28 
29 /* XXX FIXME: temporary hack while the impact of making unpredictable()
30    a "normal" (non-igen) function is evaluated.  */
31 #undef Unpredictable
32 #define Unpredictable() unpredictable_action (cpu, cia)
33 
34 /* MDMX Representations
35 
36    An 8-bit packed byte element (OB) is always unsigned.
37    The 24-bit accumulators are signed and are represented as 32-bit
38    signed values, which are reduced to 24-bit signed values prior to
39    Round and Clamp operations.
40 
41    A 16-bit packed halfword element (QH) is always signed.
42    The 48-bit accumulators are signed and are represented as 64-bit
43    signed values, which are reduced to 48-bit signed values prior to
44    Round and Clamp operations.
45 
46    The code below assumes a 2's-complement representation of signed
47    quantities.  Care is required to clear extended sign bits when
48    repacking fields.
49 
50    The code (and the code for arithmetic shifts in mips.igen) also makes
51    the (not guaranteed portable) assumption that right shifts of signed
52    quantities in C do sign extension.  */
53 
54 typedef unsigned64 unsigned48;
55 #define MASK48 (UNSIGNED64 (0xffffffffffff))
56 
57 typedef unsigned32 unsigned24;
58 #define MASK24 (UNSIGNED32 (0xffffff))
59 
60 typedef enum {
61   mdmx_ob,          /* OB (octal byte) */
62   mdmx_qh           /* QH (quad half-word) */
63 } MX_fmt;
64 
65 typedef enum {
66   sel_elem,         /* element select */
67   sel_vect,         /* vector select */
68   sel_imm           /* immediate select */
69 } VT_select;
70 
71 #define OB_MAX  ((unsigned8)0xFF)
72 #define QH_MIN  ((signed16)0x8000)
73 #define QH_MAX  ((signed16)0x7FFF)
74 
75 #define OB_CLAMP(x)  ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
76 #define QH_CLAMP(x)  ((signed16)((x) < QH_MIN ? QH_MIN : \
77                                 ((x) > QH_MAX ? QH_MAX : (x))))
78 
79 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
80 #define MX_VT(fmtsel)  (((fmtsel) & 0x10) == 0 ?    sel_elem : \
81                        (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
82 
83 #define QH_ELEM(v,fmtsel) \
84         ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
85 #define OB_ELEM(v,fmtsel) \
86         ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
87 
88 
89 typedef signed16 (*QH_FUNC)(signed16, signed16);
90 typedef unsigned8 (*OB_FUNC)(unsigned8, unsigned8);
91 
92 /* vectorized logical operators */
93 
94 static signed16
95 AndQH(signed16 ts, signed16 tt)
96 {
97   return (signed16)((unsigned16)ts & (unsigned16)tt);
98 }
99 
100 static unsigned8
101 AndOB(unsigned8 ts, unsigned8 tt)
102 {
103   return ts & tt;
104 }
105 
106 static signed16
107 NorQH(signed16 ts, signed16 tt)
108 {
109   return (signed16)(((unsigned16)ts | (unsigned16)tt) ^ 0xFFFF);
110 }
111 
112 static unsigned8
113 NorOB(unsigned8 ts, unsigned8 tt)
114 {
115   return (ts | tt) ^ 0xFF;
116 }
117 
118 static signed16
119 OrQH(signed16 ts, signed16 tt)
120 {
121   return (signed16)((unsigned16)ts | (unsigned16)tt);
122 }
123 
124 static unsigned8
125 OrOB(unsigned8 ts, unsigned8 tt)
126 {
127   return ts | tt;
128 }
129 
130 static signed16
131 XorQH(signed16 ts, signed16 tt)
132 {
133   return (signed16)((unsigned16)ts ^ (unsigned16)tt);
134 }
135 
136 static unsigned8
137 XorOB(unsigned8 ts, unsigned8 tt)
138 {
139   return ts ^ tt;
140 }
141 
142 static signed16
143 SLLQH(signed16 ts, signed16 tt)
144 {
145   unsigned32 s = (unsigned32)tt & 0xF;
146   return (signed16)(((unsigned32)ts << s) & 0xFFFF);
147 }
148 
149 static unsigned8
150 SLLOB(unsigned8 ts, unsigned8 tt)
151 {
152   unsigned32 s = tt & 0x7;
153   return (ts << s) & 0xFF;
154 }
155 
156 static signed16
157 SRLQH(signed16 ts, signed16 tt)
158 {
159   unsigned32 s = (unsigned32)tt & 0xF;
160   return (signed16)((unsigned16)ts >> s);
161 }
162 
163 static unsigned8
164 SRLOB(unsigned8 ts, unsigned8 tt)
165 {
166   unsigned32 s = tt & 0x7;
167   return ts >> s;
168 }
169 
170 
171 /* Vectorized arithmetic operators.  */
172 
173 static signed16
174 AddQH(signed16 ts, signed16 tt)
175 {
176   signed32 t = (signed32)ts + (signed32)tt;
177   return QH_CLAMP(t);
178 }
179 
180 static unsigned8
181 AddOB(unsigned8 ts, unsigned8 tt)
182 {
183   unsigned32 t = (unsigned32)ts + (unsigned32)tt;
184   return OB_CLAMP(t);
185 }
186 
187 static signed16
188 SubQH(signed16 ts, signed16 tt)
189 {
190   signed32 t = (signed32)ts - (signed32)tt;
191   return QH_CLAMP(t);
192 }
193 
194 static unsigned8
195 SubOB(unsigned8 ts, unsigned8 tt)
196 {
197   signed32 t;
198   t = (signed32)ts - (signed32)tt;
199   if (t < 0)
200     t = 0;
201   return (unsigned8)t;
202 }
203 
204 static signed16
205 MinQH(signed16 ts, signed16 tt)
206 {
207   return (ts < tt ? ts : tt);
208 }
209 
210 static unsigned8
211 MinOB(unsigned8 ts, unsigned8 tt)
212 {
213   return (ts < tt ? ts : tt);
214 }
215 
216 static signed16
217 MaxQH(signed16 ts, signed16 tt)
218 {
219   return (ts > tt ? ts : tt);
220 }
221 
222 static unsigned8
223 MaxOB(unsigned8 ts, unsigned8 tt)
224 {
225   return (ts > tt ? ts : tt);
226 }
227 
228 static signed16
229 MulQH(signed16 ts, signed16 tt)
230 {
231   signed32 t = (signed32)ts * (signed32)tt;
232   return QH_CLAMP(t);
233 }
234 
235 static unsigned8
236 MulOB(unsigned8 ts, unsigned8 tt)
237 {
238   unsigned32 t = (unsigned32)ts * (unsigned32)tt;
239   return OB_CLAMP(t);
240 }
241 
242 /* "msgn" and "sra" are defined only for QH format.  */
243 
244 static signed16
245 MsgnQH(signed16 ts, signed16 tt)
246 {
247   signed16 t;
248   if (ts < 0)
249     t = (tt == QH_MIN ? QH_MAX : -tt);
250   else if (ts == 0)
251     t = 0;
252   else
253     t = tt;
254   return t;
255 }
256 
257 static signed16
258 SRAQH(signed16 ts, signed16 tt)
259 {
260   unsigned32 s = (unsigned32)tt & 0xF;
261   return (signed16)((signed32)ts >> s);
262 }
263 
264 
265 /* "pabsdiff" and "pavg" are defined only for OB format.  */
266 
267 static unsigned8
268 AbsDiffOB(unsigned8 ts, unsigned8 tt)
269 {
270   return (ts >= tt ? ts - tt : tt - ts);
271 }
272 
273 static unsigned8
274 AvgOB(unsigned8 ts, unsigned8 tt)
275 {
276   return ((unsigned32)ts + (unsigned32)tt + 1) >> 1;
277 }
278 
279 
280 /* Dispatch tables for operations that update a CPR.  */
281 
282 static const QH_FUNC qh_func[] = {
283   AndQH,  NorQH,  OrQH,   XorQH, SLLQH, SRLQH,
284   AddQH,  SubQH,  MinQH,  MaxQH,
285   MulQH,  MsgnQH, SRAQH,  NULL,  NULL
286 };
287 
288 static const OB_FUNC ob_func[] = {
289   AndOB,  NorOB,  OrOB,   XorOB, SLLOB, SRLOB,
290   AddOB,  SubOB,  MinOB,  MaxOB,
291   MulOB,  NULL,   NULL,   AbsDiffOB, AvgOB
292 };
293 
294 /* Auxiliary functions for CPR updates.  */
295 
296 /* Vector mapping for QH format.  */
297 static unsigned64
298 qh_vector_op(unsigned64 v1, unsigned64 v2, QH_FUNC func)
299 {
300   unsigned64 result = 0;
301   int  i;
302   signed16 h, h1, h2;
303 
304   for (i = 0; i < 64; i += 16)
305     {
306       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
307       h2 = (signed16)(v2 & 0xFFFF);  v2 >>= 16;
308       h = (*func)(h1, h2);
309       result |= ((unsigned64)((unsigned16)h) << i);
310     }
311   return result;
312 }
313 
314 static unsigned64
315 qh_map_op(unsigned64 v1, signed16 h2, QH_FUNC func)
316 {
317   unsigned64 result = 0;
318   int  i;
319   signed16 h, h1;
320 
321   for (i = 0; i < 64; i += 16)
322     {
323       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
324       h = (*func)(h1, h2);
325       result |= ((unsigned64)((unsigned16)h) << i);
326     }
327   return result;
328 }
329 
330 
331 /* Vector operations for OB format.  */
332 
333 static unsigned64
334 ob_vector_op(unsigned64 v1, unsigned64 v2, OB_FUNC func)
335 {
336   unsigned64 result = 0;
337   int  i;
338   unsigned8 b, b1, b2;
339 
340   for (i = 0; i < 64; i += 8)
341     {
342       b1 = v1 & 0xFF;  v1 >>= 8;
343       b2 = v2 & 0xFF;  v2 >>= 8;
344       b = (*func)(b1, b2);
345       result |= ((unsigned64)b << i);
346     }
347   return result;
348 }
349 
350 static unsigned64
351 ob_map_op(unsigned64 v1, unsigned8 b2, OB_FUNC func)
352 {
353   unsigned64 result = 0;
354   int  i;
355   unsigned8 b, b1;
356 
357   for (i = 0; i < 64; i += 8)
358     {
359       b1 = v1 & 0xFF;  v1 >>= 8;
360       b = (*func)(b1, b2);
361       result |= ((unsigned64)b << i);
362     }
363   return result;
364 }
365 
366 
367 /* Primary entry for operations that update CPRs.  */
368 unsigned64
369 mdmx_cpr_op(sim_cpu *cpu,
370 	    address_word cia,
371 	    int op,
372 	    unsigned64 op1,
373 	    int vt,
374 	    MX_fmtsel fmtsel)
375 {
376   unsigned64 op2;
377   unsigned64 result = 0;
378 
379   switch (MX_FMT (fmtsel))
380     {
381     case mdmx_qh:
382       switch (MX_VT (fmtsel))
383 	{
384 	case sel_elem:
385 	  op2 = ValueFPR(vt, fmt_mdmx);
386 	  result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
387 	  break;
388 	case sel_vect:
389 	  result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
390 	  break;
391 	case sel_imm:
392 	  result = qh_map_op(op1, vt, qh_func[op]);
393 	  break;
394 	}
395       break;
396     case mdmx_ob:
397       switch (MX_VT (fmtsel))
398 	{
399 	case sel_elem:
400 	  op2 = ValueFPR(vt, fmt_mdmx);
401 	  result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
402 	  break;
403 	case sel_vect:
404 	  result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
405 	  break;
406 	case sel_imm:
407 	  result = ob_map_op(op1, vt, ob_func[op]);
408 	  break;
409 	}
410       break;
411     default:
412       Unpredictable ();
413     }
414 
415   return result;
416 }
417 
418 
419 /* Operations that update CCs */
420 
421 static void
422 qh_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
423 {
424   int  i;
425   signed16 h1, h2;
426   int  boolean;
427 
428   for (i = 0; i < 4; i++)
429     {
430       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
431       h2 = (signed16)(v2 & 0xFFFF);  v2 >>= 16;
432       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
433 	((cond & MX_C_LT) && (h1 < h2));
434       SETFCC(i, boolean);
435     }
436 }
437 
438 static void
439 qh_map_test(sim_cpu *cpu, unsigned64 v1, signed16 h2, int cond)
440 {
441   int  i;
442   signed16 h1;
443   int  boolean;
444 
445   for (i = 0; i < 4; i++)
446     {
447       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
448       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
449 	((cond & MX_C_LT) && (h1 < h2));
450       SETFCC(i, boolean);
451     }
452 }
453 
454 static void
455 ob_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
456 {
457   int  i;
458   unsigned8 b1, b2;
459   int  boolean;
460 
461   for (i = 0; i < 8; i++)
462     {
463       b1 = v1 & 0xFF;  v1 >>= 8;
464       b2 = v2 & 0xFF;  v2 >>= 8;
465       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
466 	((cond & MX_C_LT) && (b1 < b2));
467       SETFCC(i, boolean);
468     }
469 }
470 
471 static void
472 ob_map_test(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int cond)
473 {
474   int  i;
475   unsigned8 b1;
476   int  boolean;
477 
478   for (i = 0; i < 8; i++)
479     {
480       b1 = (unsigned8)(v1 & 0xFF);  v1 >>= 8;
481       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
482 	((cond & MX_C_LT) && (b1 < b2));
483       SETFCC(i, boolean);
484     }
485 }
486 
487 
488 void
489 mdmx_cc_op(sim_cpu *cpu,
490 	   address_word cia,
491 	   int cond,
492 	   unsigned64 v1,
493 	   int vt,
494 	   MX_fmtsel fmtsel)
495 {
496   unsigned64 op2;
497 
498   switch (MX_FMT (fmtsel))
499     {
500     case mdmx_qh:
501       switch (MX_VT (fmtsel))
502 	{
503 	case sel_elem:
504 	  op2 = ValueFPR(vt, fmt_mdmx);
505 	  qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
506 	  break;
507 	case sel_vect:
508 	  qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
509 	  break;
510 	case sel_imm:
511 	  qh_map_test(cpu, v1, vt, cond);
512 	  break;
513 	}
514       break;
515     case mdmx_ob:
516       switch (MX_VT (fmtsel))
517 	{
518 	case sel_elem:
519 	  op2 = ValueFPR(vt, fmt_mdmx);
520 	  ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
521 	  break;
522 	case sel_vect:
523 	  ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
524 	  break;
525 	case sel_imm:
526 	  ob_map_test(cpu, v1, vt, cond);
527 	  break;
528 	}
529       break;
530     default:
531       Unpredictable ();
532     }
533 }
534 
535 
536 /* Pick operations.  */
537 
538 static unsigned64
539 qh_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
540 {
541   unsigned64 result = 0;
542   int  i, s;
543   unsigned16 h;
544 
545   s = 0;
546   for (i = 0; i < 4; i++)
547     {
548       h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
549       v1 >>= 16;  v2 >>= 16;
550       result |= ((unsigned64)h << s);
551       s += 16;
552     }
553   return result;
554 }
555 
556 static unsigned64
557 qh_map_pick(sim_cpu *cpu, unsigned64 v1, signed16 h2, int tf)
558 {
559   unsigned64 result = 0;
560   int  i, s;
561   unsigned16 h;
562 
563   s = 0;
564   for (i = 0; i < 4; i++)
565     {
566       h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (unsigned16)h2;
567       v1 >>= 16;
568       result |= ((unsigned64)h << s);
569       s += 16;
570     }
571   return result;
572 }
573 
574 static unsigned64
575 ob_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
576 {
577   unsigned64 result = 0;
578   int  i, s;
579   unsigned8 b;
580 
581   s = 0;
582   for (i = 0; i < 8; i++)
583     {
584       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
585       v1 >>= 8;  v2 >>= 8;
586       result |= ((unsigned64)b << s);
587       s += 8;
588     }
589   return result;
590 }
591 
592 static unsigned64
593 ob_map_pick(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int tf)
594 {
595   unsigned64 result = 0;
596   int  i, s;
597   unsigned8 b;
598 
599   s = 0;
600   for (i = 0; i < 8; i++)
601     {
602       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
603       v1 >>= 8;
604       result |= ((unsigned64)b << s);
605       s += 8;
606     }
607   return result;
608 }
609 
610 
611 unsigned64
612 mdmx_pick_op(sim_cpu *cpu,
613 	     address_word cia,
614 	     int tf,
615 	     unsigned64 v1,
616 	     int vt,
617 	     MX_fmtsel fmtsel)
618 {
619   unsigned64 result = 0;
620   unsigned64 op2;
621 
622   switch (MX_FMT (fmtsel))
623     {
624     case mdmx_qh:
625       switch (MX_VT (fmtsel))
626 	{
627 	case sel_elem:
628 	  op2 = ValueFPR(vt, fmt_mdmx);
629 	  result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
630 	  break;
631 	case sel_vect:
632 	  result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
633 	  break;
634 	case sel_imm:
635 	  result = qh_map_pick(cpu, v1, vt, tf);
636 	  break;
637 	}
638       break;
639     case mdmx_ob:
640       switch (MX_VT (fmtsel))
641 	{
642 	case sel_elem:
643 	  op2 = ValueFPR(vt, fmt_mdmx);
644 	  result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
645 	  break;
646 	case sel_vect:
647 	  result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
648 	  break;
649 	case sel_imm:
650 	  result = ob_map_pick(cpu, v1, vt, tf);
651 	  break;
652 	}
653       break;
654     default:
655       Unpredictable ();
656     }
657   return result;
658 }
659 
660 
661 /* Accumulators.  */
662 
663 typedef void (*QH_ACC)(signed48 *a, signed16 ts, signed16 tt);
664 
665 static void
666 AccAddAQH(signed48 *a, signed16 ts, signed16 tt)
667 {
668   *a += (signed48)ts + (signed48)tt;
669 }
670 
671 static void
672 AccAddLQH(signed48 *a, signed16 ts, signed16 tt)
673 {
674   *a = (signed48)ts + (signed48)tt;
675 }
676 
677 static void
678 AccMulAQH(signed48 *a, signed16 ts, signed16 tt)
679 {
680   *a += (signed48)ts * (signed48)tt;
681 }
682 
683 static void
684 AccMulLQH(signed48 *a, signed16 ts, signed16 tt)
685 {
686   *a = (signed48)ts * (signed48)tt;
687 }
688 
689 static void
690 SubMulAQH(signed48 *a, signed16 ts, signed16 tt)
691 {
692   *a -= (signed48)ts * (signed48)tt;
693 }
694 
695 static void
696 SubMulLQH(signed48 *a, signed16 ts, signed16 tt)
697 {
698   *a = -((signed48)ts * (signed48)tt);
699 }
700 
701 static void
702 AccSubAQH(signed48 *a, signed16 ts, signed16 tt)
703 {
704   *a += (signed48)ts - (signed48)tt;
705 }
706 
707 static void
708 AccSubLQH(signed48 *a, signed16 ts, signed16 tt)
709 {
710   *a =  (signed48)ts - (signed48)tt;
711 }
712 
713 
714 typedef void (*OB_ACC)(signed24 *acc, unsigned8 ts, unsigned8 tt);
715 
716 static void
717 AccAddAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
718 {
719   *a += (signed24)ts + (signed24)tt;
720 }
721 
722 static void
723 AccAddLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
724 {
725   *a = (signed24)ts + (signed24)tt;
726 }
727 
728 static void
729 AccMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
730 {
731   *a += (signed24)ts * (signed24)tt;
732 }
733 
734 static void
735 AccMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
736 {
737   *a = (signed24)ts * (signed24)tt;
738 }
739 
740 static void
741 SubMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
742 {
743   *a -= (signed24)ts * (signed24)tt;
744 }
745 
746 static void
747 SubMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
748 {
749   *a = -((signed24)ts * (signed24)tt);
750 }
751 
752 static void
753 AccSubAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
754 {
755   *a += (signed24)ts - (signed24)tt;
756 }
757 
758 static void
759 AccSubLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
760 {
761   *a = (signed24)ts - (signed24)tt;
762 }
763 
764 static void
765 AccAbsDiffOB(signed24 *a, unsigned8 ts, unsigned8 tt)
766 {
767   unsigned8 t = (ts >= tt ? ts - tt : tt - ts);
768   *a += (signed24)t;
769 }
770 
771 
772 /* Dispatch tables for operations that update a CPR.  */
773 
774 static const QH_ACC qh_acc[] = {
775   AccAddAQH, AccAddAQH, AccMulAQH, AccMulLQH,
776   SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
777   NULL
778 };
779 
780 static const OB_ACC ob_acc[] = {
781   AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
782   SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
783   AccAbsDiffOB
784 };
785 
786 
787 static void
788 qh_vector_acc(signed48 a[], unsigned64 v1, unsigned64 v2, QH_ACC acc)
789 {
790   int  i;
791   signed16 h1, h2;
792 
793   for (i = 0; i < 4; i++)
794     {
795       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
796       h2 = (signed16)(v2 & 0xFFFF);  v2 >>= 16;
797       (*acc)(&a[i], h1, h2);
798     }
799 }
800 
801 static void
802 qh_map_acc(signed48 a[], unsigned64 v1, signed16 h2, QH_ACC acc)
803 {
804   int  i;
805   signed16 h1;
806 
807   for (i = 0; i < 4; i++)
808     {
809       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
810       (*acc)(&a[i], h1, h2);
811     }
812 }
813 
814 static void
815 ob_vector_acc(signed24 a[], unsigned64 v1, unsigned64 v2, OB_ACC acc)
816 {
817   int  i;
818   unsigned8  b1, b2;
819 
820   for (i = 0; i < 8; i++)
821     {
822       b1 = v1 & 0xFF;  v1 >>= 8;
823       b2 = v2 & 0xFF;  v2 >>= 8;
824       (*acc)(&a[i], b1, b2);
825     }
826 }
827 
828 static void
829 ob_map_acc(signed24 a[], unsigned64 v1, unsigned8 b2, OB_ACC acc)
830 {
831   int  i;
832   unsigned8 b1;
833 
834   for (i = 0; i < 8; i++)
835     {
836       b1 = v1 & 0xFF;  v1 >>= 8;
837       (*acc)(&a[i], b1, b2);
838     }
839 }
840 
841 
842 /* Primary entry for operations that accumulate */
843 void
844 mdmx_acc_op(sim_cpu *cpu,
845 	    address_word cia,
846 	    int op,
847 	    unsigned64 op1,
848 	    int vt,
849 	    MX_fmtsel fmtsel)
850 {
851   unsigned64 op2;
852 
853   switch (MX_FMT (fmtsel))
854     {
855     case mdmx_qh:
856       switch (MX_VT (fmtsel))
857 	{
858 	case sel_elem:
859 	  op2 = ValueFPR(vt, fmt_mdmx);
860 	  qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
861 	  break;
862 	case sel_vect:
863 	  qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
864 	  break;
865 	case sel_imm:
866 	  qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
867 	  break;
868 	}
869       break;
870     case mdmx_ob:
871       switch (MX_VT (fmtsel))
872 	{
873 	case sel_elem:
874 	  op2 = ValueFPR(vt, fmt_mdmx);
875 	  ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
876 	  break;
877 	case sel_vect:
878 	  ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
879 	  break;
880 	case sel_imm:
881 	  ob_map_acc(ACC.ob, op1, vt, ob_acc[op]);
882 	  break;
883 	}
884       break;
885     default:
886       Unpredictable ();
887     }
888 }
889 
890 
891 /* Reading and writing accumulator (no conversion).  */
892 
893 unsigned64
894 mdmx_rac_op(sim_cpu *cpu,
895 	    address_word cia,
896 	    int op,
897 	    int fmt)
898 {
899   unsigned64    result;
900   unsigned int  shift;
901   int           i;
902 
903   shift = op;          /* L = 00, M = 01, H = 10.  */
904   result = 0;
905 
906   switch (fmt)
907     {
908     case MX_FMT_QH:
909       shift <<= 4;              /* 16 bits per element.  */
910       for (i = 3; i >= 0; --i)
911 	{
912 	  result <<= 16;
913 	  result |= ((ACC.qh[i] >> shift) & 0xFFFF);
914 	}
915       break;
916     case MX_FMT_OB:
917       shift <<= 3;              /*  8 bits per element.  */
918       for (i = 7; i >= 0; --i)
919 	{
920 	  result <<= 8;
921 	  result |= ((ACC.ob[i] >> shift) & 0xFF);
922 	}
923       break;
924     default:
925       Unpredictable ();
926     }
927   return result;
928 }
929 
930 void
931 mdmx_wacl(sim_cpu *cpu,
932 	  address_word cia,
933 	  int fmt,
934 	  unsigned64 vs,
935 	  unsigned64 vt)
936 {
937   int           i;
938 
939   switch (fmt)
940     {
941     case MX_FMT_QH:
942       for (i = 0; i < 4; i++)
943 	{
944 	  signed32  s = (signed16)(vs & 0xFFFF);
945 	  ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
946 	  vs >>= 16;  vt >>= 16;
947 	}
948       break;
949     case MX_FMT_OB:
950       for (i = 0; i < 8; i++)
951 	{
952 	  signed16  s = (signed8)(vs & 0xFF);
953 	  ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
954 	  vs >>= 8;   vt >>= 8;
955 	}
956       break;
957     default:
958       Unpredictable ();
959     }
960 }
961 
962 void
963 mdmx_wach(sim_cpu *cpu,
964 	  address_word cia,
965 	  int fmt,
966 	  unsigned64 vs)
967 {
968   int           i;
969 
970   switch (fmt)
971     {
972     case MX_FMT_QH:
973       for (i = 0; i < 4; i++)
974 	{
975 	  signed32  s = (signed16)(vs & 0xFFFF);
976 	  ACC.qh[i] &= ~((signed48)0xFFFF << 32);
977 	  ACC.qh[i] |=  ((signed48)s << 32);
978 	  vs >>= 16;
979 	}
980       break;
981     case MX_FMT_OB:
982       for (i = 0; i < 8; i++)
983 	{
984 	  ACC.ob[i] &= ~((signed24)0xFF << 16);
985 	  ACC.ob[i] |=  ((signed24)(vs & 0xFF) << 16);
986 	  vs >>= 8;
987 	}
988       break;
989     default:
990       Unpredictable ();
991     }
992 }
993 
994 
995 /* Reading and writing accumulator (rounding conversions).
996    Enumerating function guarantees s >= 0 for QH ops.  */
997 
998 typedef signed16 (*QH_ROUND)(signed48 a, signed16 s);
999 
1000 #define QH_BIT(n)  ((unsigned48)1 << (n))
1001 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1002 
1003 static signed16
1004 RNASQH(signed48 a, signed16 s)
1005 {
1006   signed48 t;
1007   signed16 result = 0;
1008 
1009   if (s > 48)
1010     result = 0;
1011   else
1012     {
1013       t = (a >> s);
1014       if ((a & QH_BIT(47)) == 0)
1015 	{
1016 	  if (s > 0 && ((a >> (s-1)) & 1) == 1)
1017 	    t++;
1018 	  if (t > QH_MAX)
1019 	    t = QH_MAX;
1020 	}
1021       else
1022 	{
1023 	  if (s > 0 && ((a >> (s-1)) & 1) == 1)
1024 	    {
1025 	      if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1026 		t++;
1027 	    }
1028 	  if (t < QH_MIN)
1029 	    t = QH_MIN;
1030 	}
1031       result = (signed16)t;
1032     }
1033   return result;
1034 }
1035 
1036 static signed16
1037 RNAUQH(signed48 a, signed16 s)
1038 {
1039   unsigned48 t;
1040   signed16 result;
1041 
1042   if (s > 48)
1043     result = 0;
1044   else if (s == 48)
1045     result = ((unsigned48)a & MASK48) >> 47;
1046   else
1047     {
1048       t = ((unsigned48)a & MASK48) >> s;
1049       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1050 	t++;
1051       if (t > 0xFFFF)
1052 	t = 0xFFFF;
1053       result = (signed16)t;
1054     }
1055   return result;
1056 }
1057 
1058 static signed16
1059 RNESQH(signed48 a, signed16 s)
1060 {
1061   signed48 t;
1062   signed16 result = 0;
1063 
1064   if (s > 47)
1065     result = 0;
1066   else
1067     {
1068       t = (a >> s);
1069       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1070 	{
1071 	  if (s == 1 || (a & QH_ONES(s-1)) == 0)
1072 	    t += t & 1;
1073 	  else
1074 	    t += 1;
1075 	}
1076       if ((a & QH_BIT(47)) == 0)
1077 	{
1078 	  if (t > QH_MAX)
1079 	    t = QH_MAX;
1080 	}
1081       else
1082 	{
1083 	  if (t < QH_MIN)
1084 	    t = QH_MIN;
1085 	}
1086       result = (signed16)t;
1087     }
1088   return result;
1089 }
1090 
1091 static signed16
1092 RNEUQH(signed48 a, signed16 s)
1093 {
1094   unsigned48 t;
1095   signed16 result;
1096 
1097   if (s > 48)
1098     result = 0;
1099   else if (s == 48)
1100     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1101   else
1102     {
1103       t = ((unsigned48)a & MASK48) >> s;
1104       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1105 	{
1106 	  if (s > 1 && (a & QH_ONES(s-1)) != 0)
1107 	    t++;
1108 	  else
1109 	    t += t & 1;
1110 	}
1111       if (t > 0xFFFF)
1112 	t = 0xFFFF;
1113       result = (signed16)t;
1114     }
1115   return result;
1116 }
1117 
1118 static signed16
1119 RZSQH(signed48 a, signed16 s)
1120 {
1121   signed48 t;
1122   signed16 result = 0;
1123 
1124   if (s > 47)
1125     result = 0;
1126   else
1127     {
1128       t = (a >> s);
1129       if ((a & QH_BIT(47)) == 0)
1130 	{
1131 	  if (t > QH_MAX)
1132 	    t = QH_MAX;
1133 	}
1134       else
1135 	{
1136 	  if (t < QH_MIN)
1137 	    t = QH_MIN;
1138 	}
1139       result = (signed16)t;
1140     }
1141   return result;
1142 }
1143 
1144 static signed16
1145 RZUQH(signed48 a, signed16 s)
1146 {
1147   unsigned48 t;
1148   signed16 result = 0;
1149 
1150   if (s > 48)
1151     result = 0;
1152   else if (s == 48)
1153     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1154   else
1155     {
1156       t = ((unsigned48)a & MASK48) >> s;
1157       if (t > 0xFFFF)
1158 	t = 0xFFFF;
1159       result = (signed16)t;
1160     }
1161   return result;
1162 }
1163 
1164 
1165 typedef unsigned8 (*OB_ROUND)(signed24 a, unsigned8 s);
1166 
1167 #define OB_BIT(n)  ((unsigned24)1 << (n))
1168 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1169 
1170 static unsigned8
1171 RNAUOB(signed24 a, unsigned8 s)
1172 {
1173   unsigned8 result;
1174   unsigned24 t;
1175 
1176   if (s > 24)
1177     result = 0;
1178   else if (s == 24)
1179     result = ((unsigned24)a & MASK24) >> 23;
1180   else
1181     {
1182       t = ((unsigned24)a & MASK24) >> s;
1183       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1184 	t ++;
1185       result = OB_CLAMP(t);
1186     }
1187   return result;
1188 }
1189 
1190 static unsigned8
1191 RNEUOB(signed24 a, unsigned8 s)
1192 {
1193   unsigned8 result;
1194   unsigned24 t;
1195 
1196   if (s > 24)
1197     result = 0;
1198   else if (s == 24)
1199     result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1200   else
1201     {
1202       t = ((unsigned24)a & MASK24) >> s;
1203       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1204 	{
1205 	  if (s > 1 && (a & OB_ONES(s-1)) != 0)
1206 	    t++;
1207 	  else
1208 	    t += t & 1;
1209 	}
1210       result = OB_CLAMP(t);
1211     }
1212   return result;
1213 }
1214 
1215 static unsigned8
1216 RZUOB(signed24 a, unsigned8 s)
1217 {
1218   unsigned8 result;
1219   unsigned24 t;
1220 
1221   if (s >= 24)
1222     result = 0;
1223   else
1224     {
1225       t = ((unsigned24)a & MASK24) >> s;
1226       result = OB_CLAMP(t);
1227     }
1228   return result;
1229 }
1230 
1231 
1232 static const QH_ROUND qh_round[] = {
1233   RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH,  RZUQH
1234 };
1235 
1236 static const OB_ROUND ob_round[] = {
1237   NULL,   RNAUOB, NULL,   RNEUOB, NULL,   RZUOB
1238 };
1239 
1240 
1241 static unsigned64
1242 qh_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, QH_ROUND round)
1243 {
1244   unsigned64 result = 0;
1245   int  i, s;
1246   signed16 h, h2;
1247 
1248   s = 0;
1249   for (i = 0; i < 4; i++)
1250     {
1251       h2 = (signed16)(v2 & 0xFFFF);
1252       if (h2 >= 0)
1253 	h = (*round)(ACC.qh[i], h2);
1254       else
1255 	{
1256 	  UnpredictableResult ();
1257 	  h = 0xdead;
1258 	}
1259       v2 >>= 16;
1260       result |= ((unsigned64)((unsigned16)h) << s);
1261       s += 16;
1262     }
1263   return result;
1264 }
1265 
1266 static unsigned64
1267 qh_map_round(sim_cpu *cpu, address_word cia, signed16 h2, QH_ROUND round)
1268 {
1269   unsigned64 result = 0;
1270   int  i, s;
1271   signed16  h;
1272 
1273   s = 0;
1274   for (i = 0; i < 4; i++)
1275     {
1276       if (h2 >= 0)
1277 	h = (*round)(ACC.qh[i], h2);
1278       else
1279 	{
1280 	  UnpredictableResult ();
1281 	  h = 0xdead;
1282 	}
1283       result |= ((unsigned64)((unsigned16)h) << s);
1284       s += 16;
1285     }
1286   return result;
1287 }
1288 
1289 static unsigned64
1290 ob_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, OB_ROUND round)
1291 {
1292   unsigned64 result = 0;
1293   int  i, s;
1294   unsigned8 b, b2;
1295 
1296   s = 0;
1297   for (i = 0; i < 8; i++)
1298     {
1299       b2 = v2 & 0xFF;  v2 >>= 8;
1300       b = (*round)(ACC.ob[i], b2);
1301       result |= ((unsigned64)b << s);
1302       s += 8;
1303     }
1304   return result;
1305 }
1306 
1307 static unsigned64
1308 ob_map_round(sim_cpu *cpu, address_word cia, unsigned8 b2, OB_ROUND round)
1309 {
1310   unsigned64 result = 0;
1311   int  i, s;
1312   unsigned8 b;
1313 
1314   s = 0;
1315   for (i = 0; i < 8; i++)
1316     {
1317       b = (*round)(ACC.ob[i], b2);
1318       result |= ((unsigned64)b << s);
1319       s += 8;
1320     }
1321   return result;
1322 }
1323 
1324 
1325 unsigned64
1326 mdmx_round_op(sim_cpu *cpu,
1327 	      address_word cia,
1328 	      int rm,
1329 	      int vt,
1330 	      MX_fmtsel fmtsel)
1331 {
1332   unsigned64 op2;
1333   unsigned64 result = 0;
1334 
1335   switch (MX_FMT (fmtsel))
1336     {
1337     case mdmx_qh:
1338       switch (MX_VT (fmtsel))
1339 	{
1340 	case sel_elem:
1341 	  op2 = ValueFPR(vt, fmt_mdmx);
1342 	  result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1343 	  break;
1344 	case sel_vect:
1345 	  op2 = ValueFPR(vt, fmt_mdmx);
1346 	  result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1347 	  break;
1348 	case sel_imm:
1349 	  result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1350 	  break;
1351 	}
1352       break;
1353     case mdmx_ob:
1354       switch (MX_VT (fmtsel))
1355 	{
1356 	case sel_elem:
1357 	  op2 = ValueFPR(vt, fmt_mdmx);
1358 	  result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1359 	  break;
1360 	case sel_vect:
1361 	  op2 = ValueFPR(vt, fmt_mdmx);
1362 	  result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1363 	  break;
1364 	case sel_imm:
1365 	  result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1366 	  break;
1367 	}
1368       break;
1369     default:
1370       Unpredictable ();
1371     }
1372 
1373   return result;
1374 }
1375 
1376 
1377 /* Shuffle operation.  */
1378 
1379 typedef struct {
1380   enum {vs, ss, vt} source;
1381   unsigned int      index;
1382 } sh_map;
1383 
1384 static const sh_map ob_shuffle[][8] = {
1385   /* MDMX 2.0 encodings (3-4, 6-7).  */
1386   /* vr5400   encoding  (5), otherwise.  */
1387   {                                                              }, /* RSVD */
1388   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1389   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1390   {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1391   {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1392   {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1393   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1394   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}  /* mixl */
1395 };
1396 
1397 static const sh_map qh_shuffle[][4] = {
1398   {{vt,2}, {vs,2}, {vt,3}, {vs,3}},  /* mixh */
1399   {{vt,0}, {vs,0}, {vt,1}, {vs,1}},  /* mixl */
1400   {{vt,1}, {vt,3}, {vs,1}, {vs,3}},  /* pach */
1401   {                              },  /* RSVD */
1402   {{vt,1}, {vs,0}, {vt,3}, {vs,2}},  /* bfla */
1403   {                              },  /* RSVD */
1404   {{vt,2}, {vt,3}, {vs,2}, {vs,3}},  /* repa */
1405   {{vt,0}, {vt,1}, {vs,0}, {vs,1}}   /* repb */
1406 };
1407 
1408 
1409 unsigned64
1410 mdmx_shuffle(sim_cpu *cpu,
1411 	     address_word cia,
1412 	     int shop,
1413 	     unsigned64 op1,
1414 	     unsigned64 op2)
1415 {
1416   unsigned64 result = 0;
1417   int  i, s;
1418   int  op;
1419 
1420   if ((shop & 0x3) == 0x1)       /* QH format.  */
1421     {
1422       op = shop >> 2;
1423       s = 0;
1424       for (i = 0; i < 4; i++)
1425 	{
1426 	  unsigned64 v;
1427 
1428 	  switch (qh_shuffle[op][i].source)
1429 	    {
1430 	    case vs:
1431 	      v = op1;
1432 	      break;
1433 	    case vt:
1434 	      v = op2;
1435 	      break;
1436 	    default:
1437 	      Unpredictable ();
1438 	      v = 0;
1439 	    }
1440 	  result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1441 	  s += 16;
1442 	}
1443     }
1444   else if ((shop & 0x1) == 0x0)  /* OB format.  */
1445     {
1446       op = shop >> 1;
1447       s = 0;
1448       for (i = 0; i < 8; i++)
1449 	{
1450 	  unsigned8 b;
1451 	  unsigned int ishift = 8*ob_shuffle[op][i].index;
1452 
1453 	  switch (ob_shuffle[op][i].source)
1454 	    {
1455 	    case vs:
1456 	      b = (op1 >> ishift) & 0xFF;
1457 	      break;
1458 	    case ss:
1459 	      b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1460 	      break;
1461 	    case vt:
1462 	      b = (op2 >> ishift) & 0xFF;
1463 	      break;
1464 	    default:
1465 	      Unpredictable ();
1466 	      b = 0;
1467 	    }
1468 	  result |= ((unsigned64)b << s);
1469 	  s += 8;
1470 	}
1471     }
1472   else
1473     Unpredictable ();
1474 
1475   return result;
1476 }
1477