xref: /netbsd-src/external/gpl3/gdb.old/dist/sim/mips/mdmx.c (revision 8b657b0747480f8989760d71343d6dd33f8d4cf9)
1 /* Simulation code for the MIPS MDMX ASE.
2    Copyright (C) 2002-2023 Free Software Foundation, Inc.
3    Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
4    Corporation (SiByte).
5 
6 This file is part of GDB, the GNU debugger.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 /* This must come before any other includes.  */
22 #include "defs.h"
23 
24 #include <stdio.h>
25 
26 #include "sim-main.h"
27 
28 /* Within mdmx.c we refer to the sim_cpu directly. */
29 #define CPU cpu
30 #define SD  (CPU_STATE(CPU))
31 
32 /* XXX FIXME: temporary hack while the impact of making unpredictable()
33    a "normal" (non-igen) function is evaluated.  */
34 #undef Unpredictable
35 #define Unpredictable() unpredictable_action (cpu, cia)
36 
37 /* MDMX Representations
38 
39    An 8-bit packed byte element (OB) is always unsigned.
40    The 24-bit accumulators are signed and are represented as 32-bit
41    signed values, which are reduced to 24-bit signed values prior to
42    Round and Clamp operations.
43 
44    A 16-bit packed halfword element (QH) is always signed.
45    The 48-bit accumulators are signed and are represented as 64-bit
46    signed values, which are reduced to 48-bit signed values prior to
47    Round and Clamp operations.
48 
49    The code below assumes a 2's-complement representation of signed
50    quantities.  Care is required to clear extended sign bits when
51    repacking fields.
52 
53    The code (and the code for arithmetic shifts in mips.igen) also makes
54    the (not guaranteed portable) assumption that right shifts of signed
55    quantities in C do sign extension.  */
56 
57 typedef uint64_t unsigned48;
58 #define MASK48 (UNSIGNED64 (0xffffffffffff))
59 
60 typedef uint32_t unsigned24;
61 #define MASK24 (UNSIGNED32 (0xffffff))
62 
63 typedef enum {
64   mdmx_ob,          /* OB (octal byte) */
65   mdmx_qh           /* QH (quad half-word) */
66 } MX_fmt;
67 
68 typedef enum {
69   sel_elem,         /* element select */
70   sel_vect,         /* vector select */
71   sel_imm           /* immediate select */
72 } VT_select;
73 
74 #define OB_MAX  ((uint8_t)0xFF)
75 #define QH_MIN  ((int16_t)0x8000)
76 #define QH_MAX  ((int16_t)0x7FFF)
77 
78 #define OB_CLAMP(x)  ((uint8_t)((x) > OB_MAX ? OB_MAX : (x)))
79 #define QH_CLAMP(x)  ((int16_t)((x) < QH_MIN ? QH_MIN : \
80                                 ((x) > QH_MAX ? QH_MAX : (x))))
81 
82 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
83 #define MX_VT(fmtsel)  (((fmtsel) & 0x10) == 0 ?    sel_elem : \
84                        (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
85 
86 #define QH_ELEM(v,fmtsel) \
87         ((int16_t)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
88 #define OB_ELEM(v,fmtsel) \
89         ((uint8_t)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
90 
91 
92 typedef int16_t (*QH_FUNC)(int16_t, int16_t);
93 typedef uint8_t (*OB_FUNC)(uint8_t, uint8_t);
94 
95 /* vectorized logical operators */
96 
97 static int16_t
98 AndQH(int16_t ts, int16_t tt)
99 {
100   return (int16_t)((uint16_t)ts & (uint16_t)tt);
101 }
102 
103 static uint8_t
104 AndOB(uint8_t ts, uint8_t tt)
105 {
106   return ts & tt;
107 }
108 
109 static int16_t
110 NorQH(int16_t ts, int16_t tt)
111 {
112   return (int16_t)(((uint16_t)ts | (uint16_t)tt) ^ 0xFFFF);
113 }
114 
115 static uint8_t
116 NorOB(uint8_t ts, uint8_t tt)
117 {
118   return (ts | tt) ^ 0xFF;
119 }
120 
121 static int16_t
122 OrQH(int16_t ts, int16_t tt)
123 {
124   return (int16_t)((uint16_t)ts | (uint16_t)tt);
125 }
126 
127 static uint8_t
128 OrOB(uint8_t ts, uint8_t tt)
129 {
130   return ts | tt;
131 }
132 
133 static int16_t
134 XorQH(int16_t ts, int16_t tt)
135 {
136   return (int16_t)((uint16_t)ts ^ (uint16_t)tt);
137 }
138 
139 static uint8_t
140 XorOB(uint8_t ts, uint8_t tt)
141 {
142   return ts ^ tt;
143 }
144 
145 static int16_t
146 SLLQH(int16_t ts, int16_t tt)
147 {
148   uint32_t s = (uint32_t)tt & 0xF;
149   return (int16_t)(((uint32_t)ts << s) & 0xFFFF);
150 }
151 
152 static uint8_t
153 SLLOB(uint8_t ts, uint8_t tt)
154 {
155   uint32_t s = tt & 0x7;
156   return (ts << s) & 0xFF;
157 }
158 
159 static int16_t
160 SRLQH(int16_t ts, int16_t tt)
161 {
162   uint32_t s = (uint32_t)tt & 0xF;
163   return (int16_t)((uint16_t)ts >> s);
164 }
165 
166 static uint8_t
167 SRLOB(uint8_t ts, uint8_t tt)
168 {
169   uint32_t s = tt & 0x7;
170   return ts >> s;
171 }
172 
173 
174 /* Vectorized arithmetic operators.  */
175 
176 static int16_t
177 AddQH(int16_t ts, int16_t tt)
178 {
179   int32_t t = (int32_t)ts + (int32_t)tt;
180   return QH_CLAMP(t);
181 }
182 
183 static uint8_t
184 AddOB(uint8_t ts, uint8_t tt)
185 {
186   uint32_t t = (uint32_t)ts + (uint32_t)tt;
187   return OB_CLAMP(t);
188 }
189 
190 static int16_t
191 SubQH(int16_t ts, int16_t tt)
192 {
193   int32_t t = (int32_t)ts - (int32_t)tt;
194   return QH_CLAMP(t);
195 }
196 
197 static uint8_t
198 SubOB(uint8_t ts, uint8_t tt)
199 {
200   int32_t t;
201   t = (int32_t)ts - (int32_t)tt;
202   if (t < 0)
203     t = 0;
204   return (uint8_t)t;
205 }
206 
207 static int16_t
208 MinQH(int16_t ts, int16_t tt)
209 {
210   return (ts < tt ? ts : tt);
211 }
212 
213 static uint8_t
214 MinOB(uint8_t ts, uint8_t tt)
215 {
216   return (ts < tt ? ts : tt);
217 }
218 
219 static int16_t
220 MaxQH(int16_t ts, int16_t tt)
221 {
222   return (ts > tt ? ts : tt);
223 }
224 
225 static uint8_t
226 MaxOB(uint8_t ts, uint8_t tt)
227 {
228   return (ts > tt ? ts : tt);
229 }
230 
231 static int16_t
232 MulQH(int16_t ts, int16_t tt)
233 {
234   int32_t t = (int32_t)ts * (int32_t)tt;
235   return QH_CLAMP(t);
236 }
237 
238 static uint8_t
239 MulOB(uint8_t ts, uint8_t tt)
240 {
241   uint32_t t = (uint32_t)ts * (uint32_t)tt;
242   return OB_CLAMP(t);
243 }
244 
245 /* "msgn" and "sra" are defined only for QH format.  */
246 
247 static int16_t
248 MsgnQH(int16_t ts, int16_t tt)
249 {
250   int16_t t;
251   if (ts < 0)
252     t = (tt == QH_MIN ? QH_MAX : -tt);
253   else if (ts == 0)
254     t = 0;
255   else
256     t = tt;
257   return t;
258 }
259 
260 static int16_t
261 SRAQH(int16_t ts, int16_t tt)
262 {
263   uint32_t s = (uint32_t)tt & 0xF;
264   return (int16_t)((int32_t)ts >> s);
265 }
266 
267 
268 /* "pabsdiff" and "pavg" are defined only for OB format.  */
269 
270 static uint8_t
271 AbsDiffOB(uint8_t ts, uint8_t tt)
272 {
273   return (ts >= tt ? ts - tt : tt - ts);
274 }
275 
276 static uint8_t
277 AvgOB(uint8_t ts, uint8_t tt)
278 {
279   return ((uint32_t)ts + (uint32_t)tt + 1) >> 1;
280 }
281 
282 
283 /* Dispatch tables for operations that update a CPR.  */
284 
285 static const QH_FUNC qh_func[] = {
286   AndQH,  NorQH,  OrQH,   XorQH, SLLQH, SRLQH,
287   AddQH,  SubQH,  MinQH,  MaxQH,
288   MulQH,  MsgnQH, SRAQH,  NULL,  NULL
289 };
290 
291 static const OB_FUNC ob_func[] = {
292   AndOB,  NorOB,  OrOB,   XorOB, SLLOB, SRLOB,
293   AddOB,  SubOB,  MinOB,  MaxOB,
294   MulOB,  NULL,   NULL,   AbsDiffOB, AvgOB
295 };
296 
297 /* Auxiliary functions for CPR updates.  */
298 
299 /* Vector mapping for QH format.  */
300 static uint64_t
301 qh_vector_op(uint64_t v1, uint64_t v2, QH_FUNC func)
302 {
303   uint64_t result = 0;
304   int  i;
305   int16_t h, h1, h2;
306 
307   for (i = 0; i < 64; i += 16)
308     {
309       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
310       h2 = (int16_t)(v2 & 0xFFFF);  v2 >>= 16;
311       h = (*func)(h1, h2);
312       result |= ((uint64_t)((uint16_t)h) << i);
313     }
314   return result;
315 }
316 
317 static uint64_t
318 qh_map_op(uint64_t v1, int16_t h2, QH_FUNC func)
319 {
320   uint64_t result = 0;
321   int  i;
322   int16_t h, h1;
323 
324   for (i = 0; i < 64; i += 16)
325     {
326       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
327       h = (*func)(h1, h2);
328       result |= ((uint64_t)((uint16_t)h) << i);
329     }
330   return result;
331 }
332 
333 
334 /* Vector operations for OB format.  */
335 
336 static uint64_t
337 ob_vector_op(uint64_t v1, uint64_t v2, OB_FUNC func)
338 {
339   uint64_t result = 0;
340   int  i;
341   uint8_t b, b1, b2;
342 
343   for (i = 0; i < 64; i += 8)
344     {
345       b1 = v1 & 0xFF;  v1 >>= 8;
346       b2 = v2 & 0xFF;  v2 >>= 8;
347       b = (*func)(b1, b2);
348       result |= ((uint64_t)b << i);
349     }
350   return result;
351 }
352 
353 static uint64_t
354 ob_map_op(uint64_t v1, uint8_t b2, OB_FUNC func)
355 {
356   uint64_t result = 0;
357   int  i;
358   uint8_t b, b1;
359 
360   for (i = 0; i < 64; i += 8)
361     {
362       b1 = v1 & 0xFF;  v1 >>= 8;
363       b = (*func)(b1, b2);
364       result |= ((uint64_t)b << i);
365     }
366   return result;
367 }
368 
369 
370 /* Primary entry for operations that update CPRs.  */
371 uint64_t
372 mdmx_cpr_op(sim_cpu *cpu,
373 	    address_word cia,
374 	    int op,
375 	    uint64_t op1,
376 	    int vt,
377 	    MX_fmtsel fmtsel)
378 {
379   uint64_t op2;
380   uint64_t result = 0;
381 
382   switch (MX_FMT (fmtsel))
383     {
384     case mdmx_qh:
385       switch (MX_VT (fmtsel))
386 	{
387 	case sel_elem:
388 	  op2 = ValueFPR(vt, fmt_mdmx);
389 	  result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
390 	  break;
391 	case sel_vect:
392 	  result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
393 	  break;
394 	case sel_imm:
395 	  result = qh_map_op(op1, vt, qh_func[op]);
396 	  break;
397 	}
398       break;
399     case mdmx_ob:
400       switch (MX_VT (fmtsel))
401 	{
402 	case sel_elem:
403 	  op2 = ValueFPR(vt, fmt_mdmx);
404 	  result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
405 	  break;
406 	case sel_vect:
407 	  result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
408 	  break;
409 	case sel_imm:
410 	  result = ob_map_op(op1, vt, ob_func[op]);
411 	  break;
412 	}
413       break;
414     default:
415       Unpredictable ();
416     }
417 
418   return result;
419 }
420 
421 
422 /* Operations that update CCs */
423 
424 static void
425 qh_vector_test(sim_cpu *cpu, uint64_t v1, uint64_t v2, int cond)
426 {
427   int  i;
428   int16_t h1, h2;
429   int  boolean;
430 
431   for (i = 0; i < 4; i++)
432     {
433       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
434       h2 = (int16_t)(v2 & 0xFFFF);  v2 >>= 16;
435       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
436 	((cond & MX_C_LT) && (h1 < h2));
437       SETFCC(i, boolean);
438     }
439 }
440 
441 static void
442 qh_map_test(sim_cpu *cpu, uint64_t v1, int16_t h2, int cond)
443 {
444   int  i;
445   int16_t h1;
446   int  boolean;
447 
448   for (i = 0; i < 4; i++)
449     {
450       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
451       boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
452 	((cond & MX_C_LT) && (h1 < h2));
453       SETFCC(i, boolean);
454     }
455 }
456 
457 static void
458 ob_vector_test(sim_cpu *cpu, uint64_t v1, uint64_t v2, int cond)
459 {
460   int  i;
461   uint8_t b1, b2;
462   int  boolean;
463 
464   for (i = 0; i < 8; i++)
465     {
466       b1 = v1 & 0xFF;  v1 >>= 8;
467       b2 = v2 & 0xFF;  v2 >>= 8;
468       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
469 	((cond & MX_C_LT) && (b1 < b2));
470       SETFCC(i, boolean);
471     }
472 }
473 
474 static void
475 ob_map_test(sim_cpu *cpu, uint64_t v1, uint8_t b2, int cond)
476 {
477   int  i;
478   uint8_t b1;
479   int  boolean;
480 
481   for (i = 0; i < 8; i++)
482     {
483       b1 = (uint8_t)(v1 & 0xFF);  v1 >>= 8;
484       boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
485 	((cond & MX_C_LT) && (b1 < b2));
486       SETFCC(i, boolean);
487     }
488 }
489 
490 
491 void
492 mdmx_cc_op(sim_cpu *cpu,
493 	   address_word cia,
494 	   int cond,
495 	   uint64_t v1,
496 	   int vt,
497 	   MX_fmtsel fmtsel)
498 {
499   uint64_t op2;
500 
501   switch (MX_FMT (fmtsel))
502     {
503     case mdmx_qh:
504       switch (MX_VT (fmtsel))
505 	{
506 	case sel_elem:
507 	  op2 = ValueFPR(vt, fmt_mdmx);
508 	  qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
509 	  break;
510 	case sel_vect:
511 	  qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
512 	  break;
513 	case sel_imm:
514 	  qh_map_test(cpu, v1, vt, cond);
515 	  break;
516 	}
517       break;
518     case mdmx_ob:
519       switch (MX_VT (fmtsel))
520 	{
521 	case sel_elem:
522 	  op2 = ValueFPR(vt, fmt_mdmx);
523 	  ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
524 	  break;
525 	case sel_vect:
526 	  ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
527 	  break;
528 	case sel_imm:
529 	  ob_map_test(cpu, v1, vt, cond);
530 	  break;
531 	}
532       break;
533     default:
534       Unpredictable ();
535     }
536 }
537 
538 
539 /* Pick operations.  */
540 
541 static uint64_t
542 qh_vector_pick(sim_cpu *cpu, uint64_t v1, uint64_t v2, int tf)
543 {
544   uint64_t result = 0;
545   int  i, s;
546   uint16_t h;
547 
548   s = 0;
549   for (i = 0; i < 4; i++)
550     {
551       h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
552       v1 >>= 16;  v2 >>= 16;
553       result |= ((uint64_t)h << s);
554       s += 16;
555     }
556   return result;
557 }
558 
559 static uint64_t
560 qh_map_pick(sim_cpu *cpu, uint64_t v1, int16_t h2, int tf)
561 {
562   uint64_t result = 0;
563   int  i, s;
564   uint16_t h;
565 
566   s = 0;
567   for (i = 0; i < 4; i++)
568     {
569       h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (uint16_t)h2;
570       v1 >>= 16;
571       result |= ((uint64_t)h << s);
572       s += 16;
573     }
574   return result;
575 }
576 
577 static uint64_t
578 ob_vector_pick(sim_cpu *cpu, uint64_t v1, uint64_t v2, int tf)
579 {
580   uint64_t result = 0;
581   int  i, s;
582   uint8_t b;
583 
584   s = 0;
585   for (i = 0; i < 8; i++)
586     {
587       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
588       v1 >>= 8;  v2 >>= 8;
589       result |= ((uint64_t)b << s);
590       s += 8;
591     }
592   return result;
593 }
594 
595 static uint64_t
596 ob_map_pick(sim_cpu *cpu, uint64_t v1, uint8_t b2, int tf)
597 {
598   uint64_t result = 0;
599   int  i, s;
600   uint8_t b;
601 
602   s = 0;
603   for (i = 0; i < 8; i++)
604     {
605       b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
606       v1 >>= 8;
607       result |= ((uint64_t)b << s);
608       s += 8;
609     }
610   return result;
611 }
612 
613 
614 uint64_t
615 mdmx_pick_op(sim_cpu *cpu,
616 	     address_word cia,
617 	     int tf,
618 	     uint64_t v1,
619 	     int vt,
620 	     MX_fmtsel fmtsel)
621 {
622   uint64_t result = 0;
623   uint64_t op2;
624 
625   switch (MX_FMT (fmtsel))
626     {
627     case mdmx_qh:
628       switch (MX_VT (fmtsel))
629 	{
630 	case sel_elem:
631 	  op2 = ValueFPR(vt, fmt_mdmx);
632 	  result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
633 	  break;
634 	case sel_vect:
635 	  result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
636 	  break;
637 	case sel_imm:
638 	  result = qh_map_pick(cpu, v1, vt, tf);
639 	  break;
640 	}
641       break;
642     case mdmx_ob:
643       switch (MX_VT (fmtsel))
644 	{
645 	case sel_elem:
646 	  op2 = ValueFPR(vt, fmt_mdmx);
647 	  result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
648 	  break;
649 	case sel_vect:
650 	  result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
651 	  break;
652 	case sel_imm:
653 	  result = ob_map_pick(cpu, v1, vt, tf);
654 	  break;
655 	}
656       break;
657     default:
658       Unpredictable ();
659     }
660   return result;
661 }
662 
663 
664 /* Accumulators.  */
665 
666 typedef void (*QH_ACC)(signed48 *a, int16_t ts, int16_t tt);
667 
668 static void
669 AccAddAQH(signed48 *a, int16_t ts, int16_t tt)
670 {
671   *a += (signed48)ts + (signed48)tt;
672 }
673 
674 static void
675 AccAddLQH(signed48 *a, int16_t ts, int16_t tt)
676 {
677   *a = (signed48)ts + (signed48)tt;
678 }
679 
680 static void
681 AccMulAQH(signed48 *a, int16_t ts, int16_t tt)
682 {
683   *a += (signed48)ts * (signed48)tt;
684 }
685 
686 static void
687 AccMulLQH(signed48 *a, int16_t ts, int16_t tt)
688 {
689   *a = (signed48)ts * (signed48)tt;
690 }
691 
692 static void
693 SubMulAQH(signed48 *a, int16_t ts, int16_t tt)
694 {
695   *a -= (signed48)ts * (signed48)tt;
696 }
697 
698 static void
699 SubMulLQH(signed48 *a, int16_t ts, int16_t tt)
700 {
701   *a = -((signed48)ts * (signed48)tt);
702 }
703 
704 static void
705 AccSubAQH(signed48 *a, int16_t ts, int16_t tt)
706 {
707   *a += (signed48)ts - (signed48)tt;
708 }
709 
710 static void
711 AccSubLQH(signed48 *a, int16_t ts, int16_t tt)
712 {
713   *a =  (signed48)ts - (signed48)tt;
714 }
715 
716 
717 typedef void (*OB_ACC)(signed24 *acc, uint8_t ts, uint8_t tt);
718 
719 static void
720 AccAddAOB(signed24 *a, uint8_t ts, uint8_t tt)
721 {
722   *a += (signed24)ts + (signed24)tt;
723 }
724 
725 static void
726 AccAddLOB(signed24 *a, uint8_t ts, uint8_t tt)
727 {
728   *a = (signed24)ts + (signed24)tt;
729 }
730 
731 static void
732 AccMulAOB(signed24 *a, uint8_t ts, uint8_t tt)
733 {
734   *a += (signed24)ts * (signed24)tt;
735 }
736 
737 static void
738 AccMulLOB(signed24 *a, uint8_t ts, uint8_t tt)
739 {
740   *a = (signed24)ts * (signed24)tt;
741 }
742 
743 static void
744 SubMulAOB(signed24 *a, uint8_t ts, uint8_t tt)
745 {
746   *a -= (signed24)ts * (signed24)tt;
747 }
748 
749 static void
750 SubMulLOB(signed24 *a, uint8_t ts, uint8_t tt)
751 {
752   *a = -((signed24)ts * (signed24)tt);
753 }
754 
755 static void
756 AccSubAOB(signed24 *a, uint8_t ts, uint8_t tt)
757 {
758   *a += (signed24)ts - (signed24)tt;
759 }
760 
761 static void
762 AccSubLOB(signed24 *a, uint8_t ts, uint8_t tt)
763 {
764   *a = (signed24)ts - (signed24)tt;
765 }
766 
767 static void
768 AccAbsDiffOB(signed24 *a, uint8_t ts, uint8_t tt)
769 {
770   uint8_t t = (ts >= tt ? ts - tt : tt - ts);
771   *a += (signed24)t;
772 }
773 
774 
775 /* Dispatch tables for operations that update a CPR.  */
776 
777 static const QH_ACC qh_acc[] = {
778   AccAddAQH, AccAddLQH, AccMulAQH, AccMulLQH,
779   SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
780   NULL
781 };
782 
783 static const OB_ACC ob_acc[] = {
784   AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
785   SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
786   AccAbsDiffOB
787 };
788 
789 
790 static void
791 qh_vector_acc(signed48 a[], uint64_t v1, uint64_t v2, QH_ACC acc)
792 {
793   int  i;
794   int16_t h1, h2;
795 
796   for (i = 0; i < 4; i++)
797     {
798       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
799       h2 = (int16_t)(v2 & 0xFFFF);  v2 >>= 16;
800       (*acc)(&a[i], h1, h2);
801     }
802 }
803 
804 static void
805 qh_map_acc(signed48 a[], uint64_t v1, int16_t h2, QH_ACC acc)
806 {
807   int  i;
808   int16_t h1;
809 
810   for (i = 0; i < 4; i++)
811     {
812       h1 = (int16_t)(v1 & 0xFFFF);  v1 >>= 16;
813       (*acc)(&a[i], h1, h2);
814     }
815 }
816 
817 static void
818 ob_vector_acc(signed24 a[], uint64_t v1, uint64_t v2, OB_ACC acc)
819 {
820   int  i;
821   uint8_t  b1, b2;
822 
823   for (i = 0; i < 8; i++)
824     {
825       b1 = v1 & 0xFF;  v1 >>= 8;
826       b2 = v2 & 0xFF;  v2 >>= 8;
827       (*acc)(&a[i], b1, b2);
828     }
829 }
830 
831 static void
832 ob_map_acc(signed24 a[], uint64_t v1, uint8_t b2, OB_ACC acc)
833 {
834   int  i;
835   uint8_t b1;
836 
837   for (i = 0; i < 8; i++)
838     {
839       b1 = v1 & 0xFF;  v1 >>= 8;
840       (*acc)(&a[i], b1, b2);
841     }
842 }
843 
844 
845 /* Primary entry for operations that accumulate */
846 void
847 mdmx_acc_op(sim_cpu *cpu,
848 	    address_word cia,
849 	    int op,
850 	    uint64_t op1,
851 	    int vt,
852 	    MX_fmtsel fmtsel)
853 {
854   uint64_t op2;
855 
856   switch (MX_FMT (fmtsel))
857     {
858     case mdmx_qh:
859       switch (MX_VT (fmtsel))
860 	{
861 	case sel_elem:
862 	  op2 = ValueFPR(vt, fmt_mdmx);
863 	  qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
864 	  break;
865 	case sel_vect:
866 	  qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
867 	  break;
868 	case sel_imm:
869 	  qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
870 	  break;
871 	}
872       break;
873     case mdmx_ob:
874       switch (MX_VT (fmtsel))
875 	{
876 	case sel_elem:
877 	  op2 = ValueFPR(vt, fmt_mdmx);
878 	  ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
879 	  break;
880 	case sel_vect:
881 	  ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
882 	  break;
883 	case sel_imm:
884 	  ob_map_acc(ACC.ob, op1, vt, ob_acc[op]);
885 	  break;
886 	}
887       break;
888     default:
889       Unpredictable ();
890     }
891 }
892 
893 
894 /* Reading and writing accumulator (no conversion).  */
895 
896 uint64_t
897 mdmx_rac_op(sim_cpu *cpu,
898 	    address_word cia,
899 	    int op,
900 	    int fmt)
901 {
902   uint64_t    result;
903   unsigned int  shift;
904   int           i;
905 
906   shift = op;          /* L = 00, M = 01, H = 10.  */
907   result = 0;
908 
909   switch (fmt)
910     {
911     case MX_FMT_QH:
912       shift <<= 4;              /* 16 bits per element.  */
913       for (i = 3; i >= 0; --i)
914 	{
915 	  result <<= 16;
916 	  result |= ((ACC.qh[i] >> shift) & 0xFFFF);
917 	}
918       break;
919     case MX_FMT_OB:
920       shift <<= 3;              /*  8 bits per element.  */
921       for (i = 7; i >= 0; --i)
922 	{
923 	  result <<= 8;
924 	  result |= ((ACC.ob[i] >> shift) & 0xFF);
925 	}
926       break;
927     default:
928       Unpredictable ();
929     }
930   return result;
931 }
932 
933 void
934 mdmx_wacl(sim_cpu *cpu,
935 	  address_word cia,
936 	  int fmt,
937 	  uint64_t vs,
938 	  uint64_t vt)
939 {
940   int           i;
941 
942   switch (fmt)
943     {
944     case MX_FMT_QH:
945       for (i = 0; i < 4; i++)
946 	{
947 	  int32_t  s = (int16_t)(vs & 0xFFFF);
948 	  ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
949 	  vs >>= 16;  vt >>= 16;
950 	}
951       break;
952     case MX_FMT_OB:
953       for (i = 0; i < 8; i++)
954 	{
955 	  int16_t  s = (int8_t)(vs & 0xFF);
956 	  ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
957 	  vs >>= 8;   vt >>= 8;
958 	}
959       break;
960     default:
961       Unpredictable ();
962     }
963 }
964 
965 void
966 mdmx_wach(sim_cpu *cpu,
967 	  address_word cia,
968 	  int fmt,
969 	  uint64_t vs)
970 {
971   int           i;
972 
973   switch (fmt)
974     {
975     case MX_FMT_QH:
976       for (i = 0; i < 4; i++)
977 	{
978 	  int32_t  s = (int16_t)(vs & 0xFFFF);
979 	  ACC.qh[i] &= ~((signed48)0xFFFF << 32);
980 	  ACC.qh[i] |=  ((signed48)s << 32);
981 	  vs >>= 16;
982 	}
983       break;
984     case MX_FMT_OB:
985       for (i = 0; i < 8; i++)
986 	{
987 	  ACC.ob[i] &= ~((signed24)0xFF << 16);
988 	  ACC.ob[i] |=  ((signed24)(vs & 0xFF) << 16);
989 	  vs >>= 8;
990 	}
991       break;
992     default:
993       Unpredictable ();
994     }
995 }
996 
997 
998 /* Reading and writing accumulator (rounding conversions).
999    Enumerating function guarantees s >= 0 for QH ops.  */
1000 
1001 typedef int16_t (*QH_ROUND)(signed48 a, int16_t s);
1002 
1003 #define QH_BIT(n)  ((unsigned48)1 << (n))
1004 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1005 
1006 static int16_t
1007 RNASQH(signed48 a, int16_t s)
1008 {
1009   signed48 t;
1010   int16_t result = 0;
1011 
1012   if (s > 48)
1013     result = 0;
1014   else
1015     {
1016       t = (a >> s);
1017       if ((a & QH_BIT(47)) == 0)
1018 	{
1019 	  if (s > 0 && ((a >> (s-1)) & 1) == 1)
1020 	    t++;
1021 	  if (t > QH_MAX)
1022 	    t = QH_MAX;
1023 	}
1024       else
1025 	{
1026 	  if (s > 0 && ((a >> (s-1)) & 1) == 1)
1027 	    {
1028 	      if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1029 		t++;
1030 	    }
1031 	  if (t < QH_MIN)
1032 	    t = QH_MIN;
1033 	}
1034       result = (int16_t)t;
1035     }
1036   return result;
1037 }
1038 
1039 static int16_t
1040 RNAUQH(signed48 a, int16_t s)
1041 {
1042   unsigned48 t;
1043   int16_t result;
1044 
1045   if (s > 48)
1046     result = 0;
1047   else if (s == 48)
1048     result = ((unsigned48)a & MASK48) >> 47;
1049   else
1050     {
1051       t = ((unsigned48)a & MASK48) >> s;
1052       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1053 	t++;
1054       if (t > 0xFFFF)
1055 	t = 0xFFFF;
1056       result = (int16_t)t;
1057     }
1058   return result;
1059 }
1060 
1061 static int16_t
1062 RNESQH(signed48 a, int16_t s)
1063 {
1064   signed48 t;
1065   int16_t result = 0;
1066 
1067   if (s > 47)
1068     result = 0;
1069   else
1070     {
1071       t = (a >> s);
1072       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1073 	{
1074 	  if (s == 1 || (a & QH_ONES(s-1)) == 0)
1075 	    t += t & 1;
1076 	  else
1077 	    t += 1;
1078 	}
1079       if ((a & QH_BIT(47)) == 0)
1080 	{
1081 	  if (t > QH_MAX)
1082 	    t = QH_MAX;
1083 	}
1084       else
1085 	{
1086 	  if (t < QH_MIN)
1087 	    t = QH_MIN;
1088 	}
1089       result = (int16_t)t;
1090     }
1091   return result;
1092 }
1093 
1094 static int16_t
1095 RNEUQH(signed48 a, int16_t s)
1096 {
1097   unsigned48 t;
1098   int16_t result;
1099 
1100   if (s > 48)
1101     result = 0;
1102   else if (s == 48)
1103     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1104   else
1105     {
1106       t = ((unsigned48)a & MASK48) >> s;
1107       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1108 	{
1109 	  if (s > 1 && (a & QH_ONES(s-1)) != 0)
1110 	    t++;
1111 	  else
1112 	    t += t & 1;
1113 	}
1114       if (t > 0xFFFF)
1115 	t = 0xFFFF;
1116       result = (int16_t)t;
1117     }
1118   return result;
1119 }
1120 
1121 static int16_t
1122 RZSQH(signed48 a, int16_t s)
1123 {
1124   signed48 t;
1125   int16_t result = 0;
1126 
1127   if (s > 47)
1128     result = 0;
1129   else
1130     {
1131       t = (a >> s);
1132       if ((a & QH_BIT(47)) == 0)
1133 	{
1134 	  if (t > QH_MAX)
1135 	    t = QH_MAX;
1136 	}
1137       else
1138 	{
1139 	  if (t < QH_MIN)
1140 	    t = QH_MIN;
1141 	}
1142       result = (int16_t)t;
1143     }
1144   return result;
1145 }
1146 
1147 static int16_t
1148 RZUQH(signed48 a, int16_t s)
1149 {
1150   unsigned48 t;
1151   int16_t result = 0;
1152 
1153   if (s > 48)
1154     result = 0;
1155   else if (s == 48)
1156     result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1157   else
1158     {
1159       t = ((unsigned48)a & MASK48) >> s;
1160       if (t > 0xFFFF)
1161 	t = 0xFFFF;
1162       result = (int16_t)t;
1163     }
1164   return result;
1165 }
1166 
1167 
1168 typedef uint8_t (*OB_ROUND)(signed24 a, uint8_t s);
1169 
1170 #define OB_BIT(n)  ((unsigned24)1 << (n))
1171 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1172 
1173 static uint8_t
1174 RNAUOB(signed24 a, uint8_t s)
1175 {
1176   uint8_t result;
1177   unsigned24 t;
1178 
1179   if (s > 24)
1180     result = 0;
1181   else if (s == 24)
1182     result = ((unsigned24)a & MASK24) >> 23;
1183   else
1184     {
1185       t = ((unsigned24)a & MASK24) >> s;
1186       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1187 	t ++;
1188       result = OB_CLAMP(t);
1189     }
1190   return result;
1191 }
1192 
1193 static uint8_t
1194 RNEUOB(signed24 a, uint8_t s)
1195 {
1196   uint8_t result;
1197   unsigned24 t;
1198 
1199   if (s > 24)
1200     result = 0;
1201   else if (s == 24)
1202     result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1203   else
1204     {
1205       t = ((unsigned24)a & MASK24) >> s;
1206       if (s > 0 && ((a >> (s-1)) & 1) == 1)
1207 	{
1208 	  if (s > 1 && (a & OB_ONES(s-1)) != 0)
1209 	    t++;
1210 	  else
1211 	    t += t & 1;
1212 	}
1213       result = OB_CLAMP(t);
1214     }
1215   return result;
1216 }
1217 
1218 static uint8_t
1219 RZUOB(signed24 a, uint8_t s)
1220 {
1221   uint8_t result;
1222   unsigned24 t;
1223 
1224   if (s >= 24)
1225     result = 0;
1226   else
1227     {
1228       t = ((unsigned24)a & MASK24) >> s;
1229       result = OB_CLAMP(t);
1230     }
1231   return result;
1232 }
1233 
1234 
1235 static const QH_ROUND qh_round[] = {
1236   RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH,  RZUQH
1237 };
1238 
1239 static const OB_ROUND ob_round[] = {
1240   NULL,   RNAUOB, NULL,   RNEUOB, NULL,   RZUOB
1241 };
1242 
1243 
1244 static uint64_t
1245 qh_vector_round(sim_cpu *cpu, address_word cia, uint64_t v2, QH_ROUND round)
1246 {
1247   uint64_t result = 0;
1248   int  i, s;
1249   int16_t h, h2;
1250 
1251   s = 0;
1252   for (i = 0; i < 4; i++)
1253     {
1254       h2 = (int16_t)(v2 & 0xFFFF);
1255       if (h2 >= 0)
1256 	h = (*round)(ACC.qh[i], h2);
1257       else
1258 	{
1259 	  UnpredictableResult ();
1260 	  h = 0xdead;
1261 	}
1262       v2 >>= 16;
1263       result |= ((uint64_t)((uint16_t)h) << s);
1264       s += 16;
1265     }
1266   return result;
1267 }
1268 
1269 static uint64_t
1270 qh_map_round(sim_cpu *cpu, address_word cia, int16_t h2, QH_ROUND round)
1271 {
1272   uint64_t result = 0;
1273   int  i, s;
1274   int16_t  h;
1275 
1276   s = 0;
1277   for (i = 0; i < 4; i++)
1278     {
1279       if (h2 >= 0)
1280 	h = (*round)(ACC.qh[i], h2);
1281       else
1282 	{
1283 	  UnpredictableResult ();
1284 	  h = 0xdead;
1285 	}
1286       result |= ((uint64_t)((uint16_t)h) << s);
1287       s += 16;
1288     }
1289   return result;
1290 }
1291 
1292 static uint64_t
1293 ob_vector_round(sim_cpu *cpu, address_word cia, uint64_t v2, OB_ROUND round)
1294 {
1295   uint64_t result = 0;
1296   int  i, s;
1297   uint8_t b, b2;
1298 
1299   s = 0;
1300   for (i = 0; i < 8; i++)
1301     {
1302       b2 = v2 & 0xFF;  v2 >>= 8;
1303       b = (*round)(ACC.ob[i], b2);
1304       result |= ((uint64_t)b << s);
1305       s += 8;
1306     }
1307   return result;
1308 }
1309 
1310 static uint64_t
1311 ob_map_round(sim_cpu *cpu, address_word cia, uint8_t b2, OB_ROUND round)
1312 {
1313   uint64_t result = 0;
1314   int  i, s;
1315   uint8_t b;
1316 
1317   s = 0;
1318   for (i = 0; i < 8; i++)
1319     {
1320       b = (*round)(ACC.ob[i], b2);
1321       result |= ((uint64_t)b << s);
1322       s += 8;
1323     }
1324   return result;
1325 }
1326 
1327 
1328 uint64_t
1329 mdmx_round_op(sim_cpu *cpu,
1330 	      address_word cia,
1331 	      int rm,
1332 	      int vt,
1333 	      MX_fmtsel fmtsel)
1334 {
1335   uint64_t op2;
1336   uint64_t result = 0;
1337 
1338   switch (MX_FMT (fmtsel))
1339     {
1340     case mdmx_qh:
1341       switch (MX_VT (fmtsel))
1342 	{
1343 	case sel_elem:
1344 	  op2 = ValueFPR(vt, fmt_mdmx);
1345 	  result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1346 	  break;
1347 	case sel_vect:
1348 	  op2 = ValueFPR(vt, fmt_mdmx);
1349 	  result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1350 	  break;
1351 	case sel_imm:
1352 	  result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1353 	  break;
1354 	}
1355       break;
1356     case mdmx_ob:
1357       switch (MX_VT (fmtsel))
1358 	{
1359 	case sel_elem:
1360 	  op2 = ValueFPR(vt, fmt_mdmx);
1361 	  result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1362 	  break;
1363 	case sel_vect:
1364 	  op2 = ValueFPR(vt, fmt_mdmx);
1365 	  result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1366 	  break;
1367 	case sel_imm:
1368 	  result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1369 	  break;
1370 	}
1371       break;
1372     default:
1373       Unpredictable ();
1374     }
1375 
1376   return result;
1377 }
1378 
1379 
1380 /* Shuffle operation.  */
1381 
1382 typedef struct {
1383   enum {vs, ss, vt} source;
1384   unsigned int      index;
1385 } sh_map;
1386 
1387 static const sh_map ob_shuffle[][8] = {
1388   /* MDMX 2.0 encodings (3-4, 6-7).  */
1389   /* vr5400   encoding  (5), otherwise.  */
1390   {                                                              }, /* RSVD */
1391   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1392   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1393   {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1394   {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1395   {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1396   {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1397   {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}  /* mixl */
1398 };
1399 
1400 static const sh_map qh_shuffle[][4] = {
1401   {{vt,2}, {vs,2}, {vt,3}, {vs,3}},  /* mixh */
1402   {{vt,0}, {vs,0}, {vt,1}, {vs,1}},  /* mixl */
1403   {{vt,1}, {vt,3}, {vs,1}, {vs,3}},  /* pach */
1404   {                              },  /* RSVD */
1405   {{vt,1}, {vs,0}, {vt,3}, {vs,2}},  /* bfla */
1406   {                              },  /* RSVD */
1407   {{vt,2}, {vt,3}, {vs,2}, {vs,3}},  /* repa */
1408   {{vt,0}, {vt,1}, {vs,0}, {vs,1}}   /* repb */
1409 };
1410 
1411 
1412 uint64_t
1413 mdmx_shuffle(sim_cpu *cpu,
1414 	     address_word cia,
1415 	     int shop,
1416 	     uint64_t op1,
1417 	     uint64_t op2)
1418 {
1419   uint64_t result = 0;
1420   int  i, s;
1421   int  op;
1422 
1423   if ((shop & 0x3) == 0x1)       /* QH format.  */
1424     {
1425       op = shop >> 2;
1426       s = 0;
1427       for (i = 0; i < 4; i++)
1428 	{
1429 	  uint64_t v;
1430 
1431 	  switch (qh_shuffle[op][i].source)
1432 	    {
1433 	    case vs:
1434 	      v = op1;
1435 	      break;
1436 	    case vt:
1437 	      v = op2;
1438 	      break;
1439 	    default:
1440 	      Unpredictable ();
1441 	      v = 0;
1442 	    }
1443 	  result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1444 	  s += 16;
1445 	}
1446     }
1447   else if ((shop & 0x1) == 0x0)  /* OB format.  */
1448     {
1449       op = shop >> 1;
1450       s = 0;
1451       for (i = 0; i < 8; i++)
1452 	{
1453 	  uint8_t b;
1454 	  unsigned int ishift = 8*ob_shuffle[op][i].index;
1455 
1456 	  switch (ob_shuffle[op][i].source)
1457 	    {
1458 	    case vs:
1459 	      b = (op1 >> ishift) & 0xFF;
1460 	      break;
1461 	    case ss:
1462 	      b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1463 	      break;
1464 	    case vt:
1465 	      b = (op2 >> ishift) & 0xFF;
1466 	      break;
1467 	    default:
1468 	      Unpredictable ();
1469 	      b = 0;
1470 	    }
1471 	  result |= ((uint64_t)b << s);
1472 	  s += 8;
1473 	}
1474     }
1475   else
1476     Unpredictable ();
1477 
1478   return result;
1479 }
1480