xref: /netbsd-src/external/gpl3/gdb/dist/sim/mips/mdmx.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /* Simulation code for the MIPS MDMX ASE.
2    Copyright (C) 2002, 2007, 2008, 2009, 2010, 2011
3    Free Software Foundation, Inc.
4    Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
5    Corporation (SiByte).
6 
7 This file is part of GDB, the GNU debugger.
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3 of the License, or
12 (at your option) any later version.
13 
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 GNU General Public License for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
21 
22 #include <stdio.h>
23 
24 #include "sim-main.h"
25 
26 /* Within mdmx.c we refer to the sim_cpu directly. */
27 #define CPU cpu
28 #define SD  (CPU_STATE(CPU))
29 
30 /* XXX FIXME: temporary hack while the impact of making unpredictable()
31    a "normal" (non-igen) function is evaluated.  */
32 #undef Unpredictable
33 #define Unpredictable() unpredictable_action (cpu, cia)
34 
35 /* MDMX Representations
36 
37    An 8-bit packed byte element (OB) is always unsigned.
38    The 24-bit accumulators are signed and are represented as 32-bit
39    signed values, which are reduced to 24-bit signed values prior to
40    Round and Clamp operations.
41 
42    A 16-bit packed halfword element (QH) is always signed.
43    The 48-bit accumulators are signed and are represented as 64-bit
44    signed values, which are reduced to 48-bit signed values prior to
45    Round and Clamp operations.
46 
47    The code below assumes a 2's-complement representation of signed
48    quantities.  Care is required to clear extended sign bits when
49    repacking fields.
50 
51    The code (and the code for arithmetic shifts in mips.igen) also makes
52    the (not guaranteed portable) assumption that right shifts of signed
53    quantities in C do sign extension.  */
54 
55 typedef unsigned64 unsigned48;
56 #define MASK48 (UNSIGNED64 (0xffffffffffff))
57 
58 typedef unsigned32 unsigned24;
59 #define MASK24 (UNSIGNED32 (0xffffff))
60 
61 typedef enum {
62   mdmx_ob,          /* OB (octal byte) */
63   mdmx_qh           /* QH (quad half-word) */
64 } MX_fmt;
65 
66 typedef enum {
67   sel_elem,         /* element select */
68   sel_vect,         /* vector select */
69   sel_imm           /* immediate select */
70 } VT_select;
71 
72 #define OB_MAX  ((unsigned8)0xFF)
73 #define QH_MIN  ((signed16)0x8000)
74 #define QH_MAX  ((signed16)0x7FFF)
75 
76 #define OB_CLAMP(x)  ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
77 #define QH_CLAMP(x)  ((signed16)((x) < QH_MIN ? QH_MIN : \
78                                 ((x) > QH_MAX ? QH_MAX : (x))))
79 
80 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
81 #define MX_VT(fmtsel)  (((fmtsel) & 0x10) == 0 ?    sel_elem : \
82                        (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
83 
84 #define QH_ELEM(v,fmtsel) \
85         ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
86 #define OB_ELEM(v,fmtsel) \
87         ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
88 
89 
90 typedef signed16 (*QH_FUNC)(signed16, signed16);
91 typedef unsigned8 (*OB_FUNC)(unsigned8, unsigned8);
92 
93 /* vectorized logical operators */
94 
95 static signed16
96 AndQH(signed16 ts, signed16 tt)
97 {
98   return (signed16)((unsigned16)ts & (unsigned16)tt);
99 }
100 
101 static unsigned8
102 AndOB(unsigned8 ts, unsigned8 tt)
103 {
104   return ts & tt;
105 }
106 
107 static signed16
108 NorQH(signed16 ts, signed16 tt)
109 {
110   return (signed16)(((unsigned16)ts | (unsigned16)tt) ^ 0xFFFF);
111 }
112 
113 static unsigned8
114 NorOB(unsigned8 ts, unsigned8 tt)
115 {
116   return (ts | tt) ^ 0xFF;
117 }
118 
119 static signed16
120 OrQH(signed16 ts, signed16 tt)
121 {
122   return (signed16)((unsigned16)ts | (unsigned16)tt);
123 }
124 
125 static unsigned8
126 OrOB(unsigned8 ts, unsigned8 tt)
127 {
128   return ts | tt;
129 }
130 
131 static signed16
132 XorQH(signed16 ts, signed16 tt)
133 {
134   return (signed16)((unsigned16)ts ^ (unsigned16)tt);
135 }
136 
137 static unsigned8
138 XorOB(unsigned8 ts, unsigned8 tt)
139 {
140   return ts ^ tt;
141 }
142 
143 static signed16
144 SLLQH(signed16 ts, signed16 tt)
145 {
146   unsigned32 s = (unsigned32)tt & 0xF;
147   return (signed16)(((unsigned32)ts << s) & 0xFFFF);
148 }
149 
150 static unsigned8
151 SLLOB(unsigned8 ts, unsigned8 tt)
152 {
153   unsigned32 s = tt & 0x7;
154   return (ts << s) & 0xFF;
155 }
156 
157 static signed16
158 SRLQH(signed16 ts, signed16 tt)
159 {
160   unsigned32 s = (unsigned32)tt & 0xF;
161   return (signed16)((unsigned16)ts >> s);
162 }
163 
164 static unsigned8
165 SRLOB(unsigned8 ts, unsigned8 tt)
166 {
167   unsigned32 s = tt & 0x7;
168   return ts >> s;
169 }
170 
171 
172 /* Vectorized arithmetic operators.  */
173 
174 static signed16
175 AddQH(signed16 ts, signed16 tt)
176 {
177   signed32 t = (signed32)ts + (signed32)tt;
178   return QH_CLAMP(t);
179 }
180 
181 static unsigned8
182 AddOB(unsigned8 ts, unsigned8 tt)
183 {
184   unsigned32 t = (unsigned32)ts + (unsigned32)tt;
185   return OB_CLAMP(t);
186 }
187 
188 static signed16
189 SubQH(signed16 ts, signed16 tt)
190 {
191   signed32 t = (signed32)ts - (signed32)tt;
192   return QH_CLAMP(t);
193 }
194 
195 static unsigned8
196 SubOB(unsigned8 ts, unsigned8 tt)
197 {
198   signed32 t;
199   t = (signed32)ts - (signed32)tt;
200   if (t < 0)
201     t = 0;
202   return (unsigned8)t;
203 }
204 
205 static signed16
206 MinQH(signed16 ts, signed16 tt)
207 {
208   return (ts < tt ? ts : tt);
209 }
210 
211 static unsigned8
212 MinOB(unsigned8 ts, unsigned8 tt)
213 {
214   return (ts < tt ? ts : tt);
215 }
216 
217 static signed16
218 MaxQH(signed16 ts, signed16 tt)
219 {
220   return (ts > tt ? ts : tt);
221 }
222 
223 static unsigned8
224 MaxOB(unsigned8 ts, unsigned8 tt)
225 {
226   return (ts > tt ? ts : tt);
227 }
228 
229 static signed16
230 MulQH(signed16 ts, signed16 tt)
231 {
232   signed32 t = (signed32)ts * (signed32)tt;
233   return QH_CLAMP(t);
234 }
235 
236 static unsigned8
237 MulOB(unsigned8 ts, unsigned8 tt)
238 {
239   unsigned32 t = (unsigned32)ts * (unsigned32)tt;
240   return OB_CLAMP(t);
241 }
242 
243 /* "msgn" and "sra" are defined only for QH format.  */
244 
245 static signed16
246 MsgnQH(signed16 ts, signed16 tt)
247 {
248   signed16 t;
249   if (ts < 0)
250     t = (tt == QH_MIN ? QH_MAX : -tt);
251   else if (ts == 0)
252     t = 0;
253   else
254     t = tt;
255   return t;
256 }
257 
258 static signed16
259 SRAQH(signed16 ts, signed16 tt)
260 {
261   unsigned32 s = (unsigned32)tt & 0xF;
262   return (signed16)((signed32)ts >> s);
263 }
264 
265 
266 /* "pabsdiff" and "pavg" are defined only for OB format.  */
267 
268 static unsigned8
269 AbsDiffOB(unsigned8 ts, unsigned8 tt)
270 {
271   return (ts >= tt ? ts - tt : tt - ts);
272 }
273 
274 static unsigned8
275 AvgOB(unsigned8 ts, unsigned8 tt)
276 {
277   return ((unsigned32)ts + (unsigned32)tt + 1) >> 1;
278 }
279 
280 
281 /* Dispatch tables for operations that update a CPR.  */
282 
283 static const QH_FUNC qh_func[] = {
284   AndQH,  NorQH,  OrQH,   XorQH, SLLQH, SRLQH,
285   AddQH,  SubQH,  MinQH,  MaxQH,
286   MulQH,  MsgnQH, SRAQH,  NULL,  NULL
287 };
288 
289 static const OB_FUNC ob_func[] = {
290   AndOB,  NorOB,  OrOB,   XorOB, SLLOB, SRLOB,
291   AddOB,  SubOB,  MinOB,  MaxOB,
292   MulOB,  NULL,   NULL,   AbsDiffOB, AvgOB
293 };
294 
295 /* Auxiliary functions for CPR updates.  */
296 
297 /* Vector mapping for QH format.  */
298 static unsigned64
299 qh_vector_op(unsigned64 v1, unsigned64 v2, QH_FUNC func)
300 {
301   unsigned64 result = 0;
302   int  i;
303   signed16 h, h1, h2;
304 
305   for (i = 0; i < 64; i += 16)
306     {
307       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
308       h2 = (signed16)(v2 & 0xFFFF);  v2 >>= 16;
309       h = (*func)(h1, h2);
310       result |= ((unsigned64)((unsigned16)h) << i);
311     }
312   return result;
313 }
314 
315 static unsigned64
316 qh_map_op(unsigned64 v1, signed16 h2, QH_FUNC func)
317 {
318   unsigned64 result = 0;
319   int  i;
320   signed16 h, h1;
321 
322   for (i = 0; i < 64; i += 16)
323     {
324       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
325       h = (*func)(h1, h2);
326       result |= ((unsigned64)((unsigned16)h) << i);
327     }
328   return result;
329 }
330 
331 
332 /* Vector operations for OB format.  */
333 
334 static unsigned64
335 ob_vector_op(unsigned64 v1, unsigned64 v2, OB_FUNC func)
336 {
337   unsigned64 result = 0;
338   int  i;
339   unsigned8 b, b1, b2;
340 
341   for (i = 0; i < 64; i += 8)
342     {
343       b1 = v1 & 0xFF;  v1 >>= 8;
344       b2 = v2 & 0xFF;  v2 >>= 8;
345       b = (*func)(b1, b2);
346       result |= ((unsigned64)b << i);
347     }
348   return result;
349 }
350 
351 static unsigned64
352 ob_map_op(unsigned64 v1, unsigned8 b2, OB_FUNC func)
353 {
354   unsigned64 result = 0;
355   int  i;
356   unsigned8 b, b1;
357 
358   for (i = 0; i < 64; i += 8)
359     {
360       b1 = v1 & 0xFF;  v1 >>= 8;
361       b = (*func)(b1, b2);
362       result |= ((unsigned64)b << i);
363     }
364   return result;
365 }
366 
367 
368 /* Primary entry for operations that update CPRs.  */
369 unsigned64
370 mdmx_cpr_op(sim_cpu *cpu,
371 	    address_word cia,
372 	    int op,
373 	    unsigned64 op1,
374 	    int vt,
375 	    MX_fmtsel fmtsel)
376 {
377   unsigned64 op2;
378   unsigned64 result = 0;
379 
380   switch (MX_FMT (fmtsel))
381     {
382     case mdmx_qh:
383       switch (MX_VT (fmtsel))
384 	{
385 	case sel_elem:
386 	  op2 = ValueFPR(vt, fmt_mdmx);
387 	  result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
388 	  break;
389 	case sel_vect:
390 	  result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
391 	  break;
392 	case sel_imm:
393 	  result = qh_map_op(op1, vt, qh_func[op]);
394 	  break;
395 	}
396       break;
397     case mdmx_ob:
398       switch (MX_VT (fmtsel))
399 	{
400 	case sel_elem:
401 	  op2 = ValueFPR(vt, fmt_mdmx);
402 	  result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
403 	  break;
404 	case sel_vect:
405 	  result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
406 	  break;
407 	case sel_imm:
408 	  result = ob_map_op(op1, vt, ob_func[op]);
409 	  break;
410 	}
411       break;
412     default:
413       Unpredictable ();
414     }
415 
416   return result;
417 }
418 
419 
420 /* Operations that update CCs */
421 
422 static void
423 qh_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
424 {
425   int  i;
426   signed16 h1, h2;
427   int  boolean;
428 
429   for (i = 0; i < 4; i++)
430     {
431       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
432       h2 = (signed16)(v2 & 0xFFFF);  v2 >>= 16;
433       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
434 	((cond & MX_C_LT) && (h1 < h2));
435       SETFCC(i, boolean);
436     }
437 }
438 
439 static void
440 qh_map_test(sim_cpu *cpu, unsigned64 v1, signed16 h2, int cond)
441 {
442   int  i;
443   signed16 h1;
444   int  boolean;
445 
446   for (i = 0; i < 4; i++)
447     {
448       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
449       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
450 	((cond & MX_C_LT) && (h1 < h2));
451       SETFCC(i, boolean);
452     }
453 }
454 
455 static void
456 ob_vector_test(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int cond)
457 {
458   int  i;
459   unsigned8 b1, b2;
460   int  boolean;
461 
462   for (i = 0; i < 8; i++)
463     {
464       b1 = v1 & 0xFF;  v1 >>= 8;
465       b2 = v2 & 0xFF;  v2 >>= 8;
466       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
467 	((cond & MX_C_LT) && (b1 < b2));
468       SETFCC(i, boolean);
469     }
470 }
471 
472 static void
473 ob_map_test(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int cond)
474 {
475   int  i;
476   unsigned8 b1;
477   int  boolean;
478 
479   for (i = 0; i < 8; i++)
480     {
481       b1 = (unsigned8)(v1 & 0xFF);  v1 >>= 8;
482       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
483 	((cond & MX_C_LT) && (b1 < b2));
484       SETFCC(i, boolean);
485     }
486 }
487 
488 
489 void
490 mdmx_cc_op(sim_cpu *cpu,
491 	   address_word cia,
492 	   int cond,
493 	   unsigned64 v1,
494 	   int vt,
495 	   MX_fmtsel fmtsel)
496 {
497   unsigned64 op2;
498 
499   switch (MX_FMT (fmtsel))
500     {
501     case mdmx_qh:
502       switch (MX_VT (fmtsel))
503 	{
504 	case sel_elem:
505 	  op2 = ValueFPR(vt, fmt_mdmx);
506 	  qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
507 	  break;
508 	case sel_vect:
509 	  qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
510 	  break;
511 	case sel_imm:
512 	  qh_map_test(cpu, v1, vt, cond);
513 	  break;
514 	}
515       break;
516     case mdmx_ob:
517       switch (MX_VT (fmtsel))
518 	{
519 	case sel_elem:
520 	  op2 = ValueFPR(vt, fmt_mdmx);
521 	  ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
522 	  break;
523 	case sel_vect:
524 	  ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
525 	  break;
526 	case sel_imm:
527 	  ob_map_test(cpu, v1, vt, cond);
528 	  break;
529 	}
530       break;
531     default:
532       Unpredictable ();
533     }
534 }
535 
536 
537 /* Pick operations.  */
538 
539 static unsigned64
540 qh_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
541 {
542   unsigned64 result = 0;
543   int  i, s;
544   unsigned16 h;
545 
546   s = 0;
547   for (i = 0; i < 4; i++)
548     {
549       h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
550       v1 >>= 16;  v2 >>= 16;
551       result |= ((unsigned64)h << s);
552       s += 16;
553     }
554   return result;
555 }
556 
557 static unsigned64
558 qh_map_pick(sim_cpu *cpu, unsigned64 v1, signed16 h2, int tf)
559 {
560   unsigned64 result = 0;
561   int  i, s;
562   unsigned16 h;
563 
564   s = 0;
565   for (i = 0; i < 4; i++)
566     {
567       h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (unsigned16)h2;
568       v1 >>= 16;
569       result |= ((unsigned64)h << s);
570       s += 16;
571     }
572   return result;
573 }
574 
575 static unsigned64
576 ob_vector_pick(sim_cpu *cpu, unsigned64 v1, unsigned64 v2, int tf)
577 {
578   unsigned64 result = 0;
579   int  i, s;
580   unsigned8 b;
581 
582   s = 0;
583   for (i = 0; i < 8; i++)
584     {
585       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
586       v1 >>= 8;  v2 >>= 8;
587       result |= ((unsigned64)b << s);
588       s += 8;
589     }
590   return result;
591 }
592 
593 static unsigned64
594 ob_map_pick(sim_cpu *cpu, unsigned64 v1, unsigned8 b2, int tf)
595 {
596   unsigned64 result = 0;
597   int  i, s;
598   unsigned8 b;
599 
600   s = 0;
601   for (i = 0; i < 8; i++)
602     {
603       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
604       v1 >>= 8;
605       result |= ((unsigned64)b << s);
606       s += 8;
607     }
608   return result;
609 }
610 
611 
612 unsigned64
613 mdmx_pick_op(sim_cpu *cpu,
614 	     address_word cia,
615 	     int tf,
616 	     unsigned64 v1,
617 	     int vt,
618 	     MX_fmtsel fmtsel)
619 {
620   unsigned64 result = 0;
621   unsigned64 op2;
622 
623   switch (MX_FMT (fmtsel))
624     {
625     case mdmx_qh:
626       switch (MX_VT (fmtsel))
627 	{
628 	case sel_elem:
629 	  op2 = ValueFPR(vt, fmt_mdmx);
630 	  result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
631 	  break;
632 	case sel_vect:
633 	  result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
634 	  break;
635 	case sel_imm:
636 	  result = qh_map_pick(cpu, v1, vt, tf);
637 	  break;
638 	}
639       break;
640     case mdmx_ob:
641       switch (MX_VT (fmtsel))
642 	{
643 	case sel_elem:
644 	  op2 = ValueFPR(vt, fmt_mdmx);
645 	  result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
646 	  break;
647 	case sel_vect:
648 	  result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
649 	  break;
650 	case sel_imm:
651 	  result = ob_map_pick(cpu, v1, vt, tf);
652 	  break;
653 	}
654       break;
655     default:
656       Unpredictable ();
657     }
658   return result;
659 }
660 
661 
662 /* Accumulators.  */
663 
664 typedef void (*QH_ACC)(signed48 *a, signed16 ts, signed16 tt);
665 
666 static void
667 AccAddAQH(signed48 *a, signed16 ts, signed16 tt)
668 {
669   *a += (signed48)ts + (signed48)tt;
670 }
671 
672 static void
673 AccAddLQH(signed48 *a, signed16 ts, signed16 tt)
674 {
675   *a = (signed48)ts + (signed48)tt;
676 }
677 
678 static void
679 AccMulAQH(signed48 *a, signed16 ts, signed16 tt)
680 {
681   *a += (signed48)ts * (signed48)tt;
682 }
683 
684 static void
685 AccMulLQH(signed48 *a, signed16 ts, signed16 tt)
686 {
687   *a = (signed48)ts * (signed48)tt;
688 }
689 
690 static void
691 SubMulAQH(signed48 *a, signed16 ts, signed16 tt)
692 {
693   *a -= (signed48)ts * (signed48)tt;
694 }
695 
696 static void
697 SubMulLQH(signed48 *a, signed16 ts, signed16 tt)
698 {
699   *a = -((signed48)ts * (signed48)tt);
700 }
701 
702 static void
703 AccSubAQH(signed48 *a, signed16 ts, signed16 tt)
704 {
705   *a += (signed48)ts - (signed48)tt;
706 }
707 
708 static void
709 AccSubLQH(signed48 *a, signed16 ts, signed16 tt)
710 {
711   *a =  (signed48)ts - (signed48)tt;
712 }
713 
714 
715 typedef void (*OB_ACC)(signed24 *acc, unsigned8 ts, unsigned8 tt);
716 
717 static void
718 AccAddAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
719 {
720   *a += (signed24)ts + (signed24)tt;
721 }
722 
723 static void
724 AccAddLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
725 {
726   *a = (signed24)ts + (signed24)tt;
727 }
728 
729 static void
730 AccMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
731 {
732   *a += (signed24)ts * (signed24)tt;
733 }
734 
735 static void
736 AccMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
737 {
738   *a = (signed24)ts * (signed24)tt;
739 }
740 
741 static void
742 SubMulAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
743 {
744   *a -= (signed24)ts * (signed24)tt;
745 }
746 
747 static void
748 SubMulLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
749 {
750   *a = -((signed24)ts * (signed24)tt);
751 }
752 
753 static void
754 AccSubAOB(signed24 *a, unsigned8 ts, unsigned8 tt)
755 {
756   *a += (signed24)ts - (signed24)tt;
757 }
758 
759 static void
760 AccSubLOB(signed24 *a, unsigned8 ts, unsigned8 tt)
761 {
762   *a = (signed24)ts - (signed24)tt;
763 }
764 
765 static void
766 AccAbsDiffOB(signed24 *a, unsigned8 ts, unsigned8 tt)
767 {
768   unsigned8 t = (ts >= tt ? ts - tt : tt - ts);
769   *a += (signed24)t;
770 }
771 
772 
773 /* Dispatch tables for operations that update a CPR.  */
774 
775 static const QH_ACC qh_acc[] = {
776   AccAddAQH, AccAddAQH, AccMulAQH, AccMulLQH,
777   SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
778   NULL
779 };
780 
781 static const OB_ACC ob_acc[] = {
782   AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
783   SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
784   AccAbsDiffOB
785 };
786 
787 
788 static void
789 qh_vector_acc(signed48 a[], unsigned64 v1, unsigned64 v2, QH_ACC acc)
790 {
791   int  i;
792   signed16 h1, h2;
793 
794   for (i = 0; i < 4; i++)
795     {
796       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
797       h2 = (signed16)(v2 & 0xFFFF);  v2 >>= 16;
798       (*acc)(&a[i], h1, h2);
799     }
800 }
801 
802 static void
803 qh_map_acc(signed48 a[], unsigned64 v1, signed16 h2, QH_ACC acc)
804 {
805   int  i;
806   signed16 h1;
807 
808   for (i = 0; i < 4; i++)
809     {
810       h1 = (signed16)(v1 & 0xFFFF);  v1 >>= 16;
811       (*acc)(&a[i], h1, h2);
812     }
813 }
814 
815 static void
816 ob_vector_acc(signed24 a[], unsigned64 v1, unsigned64 v2, OB_ACC acc)
817 {
818   int  i;
819   unsigned8  b1, b2;
820 
821   for (i = 0; i < 8; i++)
822     {
823       b1 = v1 & 0xFF;  v1 >>= 8;
824       b2 = v2 & 0xFF;  v2 >>= 8;
825       (*acc)(&a[i], b1, b2);
826     }
827 }
828 
829 static void
830 ob_map_acc(signed24 a[], unsigned64 v1, unsigned8 b2, OB_ACC acc)
831 {
832   int  i;
833   unsigned8 b1;
834 
835   for (i = 0; i < 8; i++)
836     {
837       b1 = v1 & 0xFF;  v1 >>= 8;
838       (*acc)(&a[i], b1, b2);
839     }
840 }
841 
842 
843 /* Primary entry for operations that accumulate */
844 void
845 mdmx_acc_op(sim_cpu *cpu,
846 	    address_word cia,
847 	    int op,
848 	    unsigned64 op1,
849 	    int vt,
850 	    MX_fmtsel fmtsel)
851 {
852   unsigned64 op2;
853 
854   switch (MX_FMT (fmtsel))
855     {
856     case mdmx_qh:
857       switch (MX_VT (fmtsel))
858 	{
859 	case sel_elem:
860 	  op2 = ValueFPR(vt, fmt_mdmx);
861 	  qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
862 	  break;
863 	case sel_vect:
864 	  qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
865 	  break;
866 	case sel_imm:
867 	  qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
868 	  break;
869 	}
870       break;
871     case mdmx_ob:
872       switch (MX_VT (fmtsel))
873 	{
874 	case sel_elem:
875 	  op2 = ValueFPR(vt, fmt_mdmx);
876 	  ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
877 	  break;
878 	case sel_vect:
879 	  ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
880 	  break;
881 	case sel_imm:
882 	  ob_map_acc(ACC.ob, op1, vt, ob_acc[op]);
883 	  break;
884 	}
885       break;
886     default:
887       Unpredictable ();
888     }
889 }
890 
891 
892 /* Reading and writing accumulator (no conversion).  */
893 
894 unsigned64
895 mdmx_rac_op(sim_cpu *cpu,
896 	    address_word cia,
897 	    int op,
898 	    int fmt)
899 {
900   unsigned64    result;
901   unsigned int  shift;
902   int           i;
903 
904   shift = op;          /* L = 00, M = 01, H = 10.  */
905   result = 0;
906 
907   switch (fmt)
908     {
909     case MX_FMT_QH:
910       shift <<= 4;              /* 16 bits per element.  */
911       for (i = 3; i >= 0; --i)
912 	{
913 	  result <<= 16;
914 	  result |= ((ACC.qh[i] >> shift) & 0xFFFF);
915 	}
916       break;
917     case MX_FMT_OB:
918       shift <<= 3;              /*  8 bits per element.  */
919       for (i = 7; i >= 0; --i)
920 	{
921 	  result <<= 8;
922 	  result |= ((ACC.ob[i] >> shift) & 0xFF);
923 	}
924       break;
925     default:
926       Unpredictable ();
927     }
928   return result;
929 }
930 
931 void
932 mdmx_wacl(sim_cpu *cpu,
933 	  address_word cia,
934 	  int fmt,
935 	  unsigned64 vs,
936 	  unsigned64 vt)
937 {
938   int           i;
939 
940   switch (fmt)
941     {
942     case MX_FMT_QH:
943       for (i = 0; i < 4; i++)
944 	{
945 	  signed32  s = (signed16)(vs & 0xFFFF);
946 	  ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
947 	  vs >>= 16;  vt >>= 16;
948 	}
949       break;
950     case MX_FMT_OB:
951       for (i = 0; i < 8; i++)
952 	{
953 	  signed16  s = (signed8)(vs & 0xFF);
954 	  ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
955 	  vs >>= 8;   vt >>= 8;
956 	}
957       break;
958     default:
959       Unpredictable ();
960     }
961 }
962 
963 void
964 mdmx_wach(sim_cpu *cpu,
965 	  address_word cia,
966 	  int fmt,
967 	  unsigned64 vs)
968 {
969   int           i;
970 
971   switch (fmt)
972     {
973     case MX_FMT_QH:
974       for (i = 0; i < 4; i++)
975 	{
976 	  signed32  s = (signed16)(vs & 0xFFFF);
977 	  ACC.qh[i] &= ~((signed48)0xFFFF << 32);
978 	  ACC.qh[i] |=  ((signed48)s << 32);
979 	  vs >>= 16;
980 	}
981       break;
982     case MX_FMT_OB:
983       for (i = 0; i < 8; i++)
984 	{
985 	  ACC.ob[i] &= ~((signed24)0xFF << 16);
986 	  ACC.ob[i] |=  ((signed24)(vs & 0xFF) << 16);
987 	  vs >>= 8;
988 	}
989       break;
990     default:
991       Unpredictable ();
992     }
993 }
994 
995 
996 /* Reading and writing accumulator (rounding conversions).
997    Enumerating function guarantees s >= 0 for QH ops.  */
998 
999 typedef signed16 (*QH_ROUND)(signed48 a, signed16 s);
1000 
1001 #define QH_BIT(n)  ((unsigned48)1 << (n))
1002 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1003 
1004 static signed16
1005 RNASQH(signed48 a, signed16 s)
1006 {
1007   signed48 t;
1008   signed16 result = 0;
1009 
1010   if (s > 48)
1011     result = 0;
1012   else
1013     {
1014       t = (a >> s);
1015       if ((a & QH_BIT(47)) == 0)
1016 	{
1017 	  if (s > 0 && ((a >> (s-1)) & 1) == 1)
1018 	    t++;
1019 	  if (t > QH_MAX)
1020 	    t = QH_MAX;
1021 	}
1022       else
1023 	{
1024 	  if (s > 0 && ((a >> (s-1)) & 1) == 1)
1025 	    {
1026 	      if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1027 		t++;
1028 	    }
1029 	  if (t < QH_MIN)
1030 	    t = QH_MIN;
1031 	}
1032       result = (signed16)t;
1033     }
1034   return result;
1035 }
1036 
1037 static signed16
1038 RNAUQH(signed48 a, signed16 s)
1039 {
1040   unsigned48 t;
1041   signed16 result;
1042 
1043   if (s > 48)
1044     result = 0;
1045   else if (s == 48)
1046     result = ((unsigned48)a & MASK48) >> 47;
1047   else
1048     {
1049       t = ((unsigned48)a & MASK48) >> s;
1050       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1051 	t++;
1052       if (t > 0xFFFF)
1053 	t = 0xFFFF;
1054       result = (signed16)t;
1055     }
1056   return result;
1057 }
1058 
1059 static signed16
1060 RNESQH(signed48 a, signed16 s)
1061 {
1062   signed48 t;
1063   signed16 result = 0;
1064 
1065   if (s > 47)
1066     result = 0;
1067   else
1068     {
1069       t = (a >> s);
1070       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1071 	{
1072 	  if (s == 1 || (a & QH_ONES(s-1)) == 0)
1073 	    t += t & 1;
1074 	  else
1075 	    t += 1;
1076 	}
1077       if ((a & QH_BIT(47)) == 0)
1078 	{
1079 	  if (t > QH_MAX)
1080 	    t = QH_MAX;
1081 	}
1082       else
1083 	{
1084 	  if (t < QH_MIN)
1085 	    t = QH_MIN;
1086 	}
1087       result = (signed16)t;
1088     }
1089   return result;
1090 }
1091 
1092 static signed16
1093 RNEUQH(signed48 a, signed16 s)
1094 {
1095   unsigned48 t;
1096   signed16 result;
1097 
1098   if (s > 48)
1099     result = 0;
1100   else if (s == 48)
1101     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1102   else
1103     {
1104       t = ((unsigned48)a & MASK48) >> s;
1105       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1106 	{
1107 	  if (s > 1 && (a & QH_ONES(s-1)) != 0)
1108 	    t++;
1109 	  else
1110 	    t += t & 1;
1111 	}
1112       if (t > 0xFFFF)
1113 	t = 0xFFFF;
1114       result = (signed16)t;
1115     }
1116   return result;
1117 }
1118 
1119 static signed16
1120 RZSQH(signed48 a, signed16 s)
1121 {
1122   signed48 t;
1123   signed16 result = 0;
1124 
1125   if (s > 47)
1126     result = 0;
1127   else
1128     {
1129       t = (a >> s);
1130       if ((a & QH_BIT(47)) == 0)
1131 	{
1132 	  if (t > QH_MAX)
1133 	    t = QH_MAX;
1134 	}
1135       else
1136 	{
1137 	  if (t < QH_MIN)
1138 	    t = QH_MIN;
1139 	}
1140       result = (signed16)t;
1141     }
1142   return result;
1143 }
1144 
1145 static signed16
1146 RZUQH(signed48 a, signed16 s)
1147 {
1148   unsigned48 t;
1149   signed16 result = 0;
1150 
1151   if (s > 48)
1152     result = 0;
1153   else if (s == 48)
1154     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1155   else
1156     {
1157       t = ((unsigned48)a & MASK48) >> s;
1158       if (t > 0xFFFF)
1159 	t = 0xFFFF;
1160       result = (signed16)t;
1161     }
1162   return result;
1163 }
1164 
1165 
1166 typedef unsigned8 (*OB_ROUND)(signed24 a, unsigned8 s);
1167 
1168 #define OB_BIT(n)  ((unsigned24)1 << (n))
1169 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1170 
1171 static unsigned8
1172 RNAUOB(signed24 a, unsigned8 s)
1173 {
1174   unsigned8 result;
1175   unsigned24 t;
1176 
1177   if (s > 24)
1178     result = 0;
1179   else if (s == 24)
1180     result = ((unsigned24)a & MASK24) >> 23;
1181   else
1182     {
1183       t = ((unsigned24)a & MASK24) >> s;
1184       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1185 	t ++;
1186       result = OB_CLAMP(t);
1187     }
1188   return result;
1189 }
1190 
1191 static unsigned8
1192 RNEUOB(signed24 a, unsigned8 s)
1193 {
1194   unsigned8 result;
1195   unsigned24 t;
1196 
1197   if (s > 24)
1198     result = 0;
1199   else if (s == 24)
1200     result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1201   else
1202     {
1203       t = ((unsigned24)a & MASK24) >> s;
1204       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1205 	{
1206 	  if (s > 1 && (a & OB_ONES(s-1)) != 0)
1207 	    t++;
1208 	  else
1209 	    t += t & 1;
1210 	}
1211       result = OB_CLAMP(t);
1212     }
1213   return result;
1214 }
1215 
1216 static unsigned8
1217 RZUOB(signed24 a, unsigned8 s)
1218 {
1219   unsigned8 result;
1220   unsigned24 t;
1221 
1222   if (s >= 24)
1223     result = 0;
1224   else
1225     {
1226       t = ((unsigned24)a & MASK24) >> s;
1227       result = OB_CLAMP(t);
1228     }
1229   return result;
1230 }
1231 
1232 
1233 static const QH_ROUND qh_round[] = {
1234   RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH,  RZUQH
1235 };
1236 
1237 static const OB_ROUND ob_round[] = {
1238   NULL,   RNAUOB, NULL,   RNEUOB, NULL,   RZUOB
1239 };
1240 
1241 
1242 static unsigned64
1243 qh_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, QH_ROUND round)
1244 {
1245   unsigned64 result = 0;
1246   int  i, s;
1247   signed16 h, h2;
1248 
1249   s = 0;
1250   for (i = 0; i < 4; i++)
1251     {
1252       h2 = (signed16)(v2 & 0xFFFF);
1253       if (h2 >= 0)
1254 	h = (*round)(ACC.qh[i], h2);
1255       else
1256 	{
1257 	  UnpredictableResult ();
1258 	  h = 0xdead;
1259 	}
1260       v2 >>= 16;
1261       result |= ((unsigned64)((unsigned16)h) << s);
1262       s += 16;
1263     }
1264   return result;
1265 }
1266 
1267 static unsigned64
1268 qh_map_round(sim_cpu *cpu, address_word cia, signed16 h2, QH_ROUND round)
1269 {
1270   unsigned64 result = 0;
1271   int  i, s;
1272   signed16  h;
1273 
1274   s = 0;
1275   for (i = 0; i < 4; i++)
1276     {
1277       if (h2 >= 0)
1278 	h = (*round)(ACC.qh[i], h2);
1279       else
1280 	{
1281 	  UnpredictableResult ();
1282 	  h = 0xdead;
1283 	}
1284       result |= ((unsigned64)((unsigned16)h) << s);
1285       s += 16;
1286     }
1287   return result;
1288 }
1289 
1290 static unsigned64
1291 ob_vector_round(sim_cpu *cpu, address_word cia, unsigned64 v2, OB_ROUND round)
1292 {
1293   unsigned64 result = 0;
1294   int  i, s;
1295   unsigned8 b, b2;
1296 
1297   s = 0;
1298   for (i = 0; i < 8; i++)
1299     {
1300       b2 = v2 & 0xFF;  v2 >>= 8;
1301       b = (*round)(ACC.ob[i], b2);
1302       result |= ((unsigned64)b << s);
1303       s += 8;
1304     }
1305   return result;
1306 }
1307 
1308 static unsigned64
1309 ob_map_round(sim_cpu *cpu, address_word cia, unsigned8 b2, OB_ROUND round)
1310 {
1311   unsigned64 result = 0;
1312   int  i, s;
1313   unsigned8 b;
1314 
1315   s = 0;
1316   for (i = 0; i < 8; i++)
1317     {
1318       b = (*round)(ACC.ob[i], b2);
1319       result |= ((unsigned64)b << s);
1320       s += 8;
1321     }
1322   return result;
1323 }
1324 
1325 
1326 unsigned64
1327 mdmx_round_op(sim_cpu *cpu,
1328 	      address_word cia,
1329 	      int rm,
1330 	      int vt,
1331 	      MX_fmtsel fmtsel)
1332 {
1333   unsigned64 op2;
1334   unsigned64 result = 0;
1335 
1336   switch (MX_FMT (fmtsel))
1337     {
1338     case mdmx_qh:
1339       switch (MX_VT (fmtsel))
1340 	{
1341 	case sel_elem:
1342 	  op2 = ValueFPR(vt, fmt_mdmx);
1343 	  result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1344 	  break;
1345 	case sel_vect:
1346 	  op2 = ValueFPR(vt, fmt_mdmx);
1347 	  result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1348 	  break;
1349 	case sel_imm:
1350 	  result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1351 	  break;
1352 	}
1353       break;
1354     case mdmx_ob:
1355       switch (MX_VT (fmtsel))
1356 	{
1357 	case sel_elem:
1358 	  op2 = ValueFPR(vt, fmt_mdmx);
1359 	  result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1360 	  break;
1361 	case sel_vect:
1362 	  op2 = ValueFPR(vt, fmt_mdmx);
1363 	  result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1364 	  break;
1365 	case sel_imm:
1366 	  result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1367 	  break;
1368 	}
1369       break;
1370     default:
1371       Unpredictable ();
1372     }
1373 
1374   return result;
1375 }
1376 
1377 
1378 /* Shuffle operation.  */
1379 
1380 typedef struct {
1381   enum {vs, ss, vt} source;
1382   unsigned int      index;
1383 } sh_map;
1384 
1385 static const sh_map ob_shuffle[][8] = {
1386   /* MDMX 2.0 encodings (3-4, 6-7).  */
1387   /* vr5400   encoding  (5), otherwise.  */
1388   {                                                              }, /* RSVD */
1389   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1390   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1391   {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1392   {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1393   {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1394   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1395   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}  /* mixl */
1396 };
1397 
1398 static const sh_map qh_shuffle[][4] = {
1399   {{vt,2}, {vs,2}, {vt,3}, {vs,3}},  /* mixh */
1400   {{vt,0}, {vs,0}, {vt,1}, {vs,1}},  /* mixl */
1401   {{vt,1}, {vt,3}, {vs,1}, {vs,3}},  /* pach */
1402   {                              },  /* RSVD */
1403   {{vt,1}, {vs,0}, {vt,3}, {vs,2}},  /* bfla */
1404   {                              },  /* RSVD */
1405   {{vt,2}, {vt,3}, {vs,2}, {vs,3}},  /* repa */
1406   {{vt,0}, {vt,1}, {vs,0}, {vs,1}}   /* repb */
1407 };
1408 
1409 
1410 unsigned64
1411 mdmx_shuffle(sim_cpu *cpu,
1412 	     address_word cia,
1413 	     int shop,
1414 	     unsigned64 op1,
1415 	     unsigned64 op2)
1416 {
1417   unsigned64 result = 0;
1418   int  i, s;
1419   int  op;
1420 
1421   if ((shop & 0x3) == 0x1)       /* QH format.  */
1422     {
1423       op = shop >> 2;
1424       s = 0;
1425       for (i = 0; i < 4; i++)
1426 	{
1427 	  unsigned64 v;
1428 
1429 	  switch (qh_shuffle[op][i].source)
1430 	    {
1431 	    case vs:
1432 	      v = op1;
1433 	      break;
1434 	    case vt:
1435 	      v = op2;
1436 	      break;
1437 	    default:
1438 	      Unpredictable ();
1439 	      v = 0;
1440 	    }
1441 	  result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1442 	  s += 16;
1443 	}
1444     }
1445   else if ((shop & 0x1) == 0x0)  /* OB format.  */
1446     {
1447       op = shop >> 1;
1448       s = 0;
1449       for (i = 0; i < 8; i++)
1450 	{
1451 	  unsigned8 b;
1452 	  unsigned int ishift = 8*ob_shuffle[op][i].index;
1453 
1454 	  switch (ob_shuffle[op][i].source)
1455 	    {
1456 	    case vs:
1457 	      b = (op1 >> ishift) & 0xFF;
1458 	      break;
1459 	    case ss:
1460 	      b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1461 	      break;
1462 	    case vt:
1463 	      b = (op2 >> ishift) & 0xFF;
1464 	      break;
1465 	    default:
1466 	      Unpredictable ();
1467 	      b = 0;
1468 	    }
1469 	  result |= ((unsigned64)b << s);
1470 	  s += 8;
1471 	}
1472     }
1473   else
1474     Unpredictable ();
1475 
1476   return result;
1477 }
1478