xref: /netbsd-src/external/gpl3/gdb/dist/sim/rx/rx.c (revision 1f4e7eb9e5e045e008f1894823a8e4e6c9f46890)
1 /* rx.c --- opcode semantics for stand-alone RX simulator.
2 
3 Copyright (C) 2008-2024 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
5 
6 This file is part of the GNU simulators.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 /* This must come before any other includes.  */
22 #include "defs.h"
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <signal.h>
28 #include "libiberty.h"
29 
30 #include "opcode/rx.h"
31 #include "cpu.h"
32 #include "mem.h"
33 #include "syscalls.h"
34 #include "fpu.h"
35 #include "err.h"
36 #include "misc.h"
37 
38 #ifdef WITH_PROFILE
39 static const char * const id_names[] = {
40   "RXO_unknown",
41   "RXO_mov",	/* d = s (signed) */
42   "RXO_movbi",	/* d = [s,s2] (signed) */
43   "RXO_movbir",	/* [s,s2] = d (signed) */
44   "RXO_pushm",	/* s..s2 */
45   "RXO_popm",	/* s..s2 */
46   "RXO_xchg",	/* s <-> d */
47   "RXO_stcc",	/* d = s if cond(s2) */
48   "RXO_rtsd",	/* rtsd, 1=imm, 2-0 = reg if reg type */
49 
50   /* These are all either d OP= s or, if s2 is set, d = s OP s2.  Note
51      that d may be "None".  */
52   "RXO_and",
53   "RXO_or",
54   "RXO_xor",
55   "RXO_add",
56   "RXO_sub",
57   "RXO_mul",
58   "RXO_div",
59   "RXO_divu",
60   "RXO_shll",
61   "RXO_shar",
62   "RXO_shlr",
63 
64   "RXO_adc",	/* d = d + s + carry */
65   "RXO_sbb",	/* d = d - s - ~carry */
66   "RXO_abs",	/* d = |s| */
67   "RXO_max",	/* d = max(d,s) */
68   "RXO_min",	/* d = min(d,s) */
69   "RXO_emul",	/* d:64 = d:32 * s */
70   "RXO_emulu",	/* d:64 = d:32 * s (unsigned) */
71 
72   "RXO_rolc",	/* d <<= 1 through carry */
73   "RXO_rorc",	/* d >>= 1 through carry*/
74   "RXO_rotl",	/* d <<= #s without carry */
75   "RXO_rotr",	/* d >>= #s without carry*/
76   "RXO_revw",	/* d = revw(s) */
77   "RXO_revl",	/* d = revl(s) */
78   "RXO_branch",	/* pc = d if cond(s) */
79   "RXO_branchrel",/* pc += d if cond(s) */
80   "RXO_jsr",	/* pc = d */
81   "RXO_jsrrel",	/* pc += d */
82   "RXO_rts",
83   "RXO_nop",
84   "RXO_nop2",
85   "RXO_nop3",
86   "RXO_nop4",
87   "RXO_nop5",
88   "RXO_nop6",
89   "RXO_nop7",
90 
91   "RXO_scmpu",
92   "RXO_smovu",
93   "RXO_smovb",
94   "RXO_suntil",
95   "RXO_swhile",
96   "RXO_smovf",
97   "RXO_sstr",
98 
99   "RXO_rmpa",
100   "RXO_mulhi",
101   "RXO_mullo",
102   "RXO_machi",
103   "RXO_maclo",
104   "RXO_mvtachi",
105   "RXO_mvtaclo",
106   "RXO_mvfachi",
107   "RXO_mvfacmi",
108   "RXO_mvfaclo",
109   "RXO_racw",
110 
111   "RXO_sat",	/* sat(d) */
112   "RXO_satr",
113 
114   "RXO_fadd",	/* d op= s */
115   "RXO_fcmp",
116   "RXO_fsub",
117   "RXO_ftoi",
118   "RXO_fmul",
119   "RXO_fdiv",
120   "RXO_round",
121   "RXO_itof",
122 
123   "RXO_bset",	/* d |= (1<<s) */
124   "RXO_bclr",	/* d &= ~(1<<s) */
125   "RXO_btst",	/* s & (1<<s2) */
126   "RXO_bnot",	/* d ^= (1<<s) */
127   "RXO_bmcc",	/* d<s> = cond(s2) */
128 
129   "RXO_clrpsw",	/* flag index in d */
130   "RXO_setpsw",	/* flag index in d */
131   "RXO_mvtipl",	/* new IPL in s */
132 
133   "RXO_rtfi",
134   "RXO_rte",
135   "RXO_rtd",	/* undocumented */
136   "RXO_brk",
137   "RXO_dbt",	/* undocumented */
138   "RXO_int",	/* vector id in s */
139   "RXO_stop",
140   "RXO_wait",
141 
142   "RXO_sccnd",	/* d = cond(s) ? 1 : 0 */
143 };
144 
145 static const char * const optype_names[] = {
146   " -  ",
147   "#Imm",	/* #addend */
148   " Rn ",	/* Rn */
149   "[Rn]",	/* [Rn + addend] */
150   "Ps++",	/* [Rn+] */
151   "--Pr",	/* [-Rn] */
152   " cc ",	/* eq, gtu, etc */
153   "Flag",	/* [UIOSZC] */
154   "RbRi"	/* [Rb + scale * Ri] */
155 };
156 
157 #define N_RXO ARRAY_SIZE (id_names)
158 #define N_RXT ARRAY_SIZE (optype_names)
159 #define N_MAP 90
160 
161 static unsigned long long benchmark_start_cycle;
162 static unsigned long long benchmark_end_cycle;
163 
164 static int op_cache[N_RXT][N_RXT][N_RXT];
165 static int op_cache_rev[N_MAP];
166 static int op_cache_idx = 0;
167 
168 static int
169 op_lookup (int a, int b, int c)
170 {
171   if (op_cache[a][b][c])
172     return op_cache[a][b][c];
173   op_cache_idx ++;
174   if (op_cache_idx >= N_MAP)
175     {
176       printf("op_cache_idx exceeds %d\n", N_MAP);
177       exit(1);
178     }
179   op_cache[a][b][c] = op_cache_idx;
180   op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
181   return op_cache_idx;
182 }
183 
184 static char *
185 op_cache_string (int map)
186 {
187   static int ci;
188   static char cb[5][20];
189   int a, b, c;
190 
191   map = op_cache_rev[map];
192   a = (map >> 8) & 15;
193   b = (map >> 4) & 15;
194   c = (map >> 0) & 15;
195   ci = (ci + 1) % 5;
196   sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
197   return cb[ci];
198 }
199 
200 static unsigned long long cycles_per_id[N_RXO][N_MAP];
201 static unsigned long long times_per_id[N_RXO][N_MAP];
202 static unsigned long long memory_stalls;
203 static unsigned long long register_stalls;
204 static unsigned long long branch_stalls;
205 static unsigned long long branch_alignment_stalls;
206 static unsigned long long fast_returns;
207 
208 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
209 static int prev_opcode_id = RXO_unknown;
210 static int po0;
211 
212 #define STATS(x) x
213 
214 #else
215 #define STATS(x)
216 #endif /* WITH_PROFILE */
217 
218 
219 #ifdef CYCLE_ACCURATE
220 
221 static int new_rt = -1;
222 
223 /* Number of cycles to add if an insn spans an 8-byte boundary.  */
224 static int branch_alignment_penalty = 0;
225 
226 #endif
227 
228 static int running_benchmark = 1;
229 
230 #define tprintf if (trace && running_benchmark) printf
231 
232 jmp_buf decode_jmp_buf;
233 unsigned int rx_cycles = 0;
234 
235 #ifdef CYCLE_ACCURATE
236 /* If nonzero, memory was read at some point and cycle latency might
237    take effect.  */
238 static int memory_source = 0;
239 /* If nonzero, memory was written and extra cycles might be
240    needed.  */
241 static int memory_dest = 0;
242 
243 static void
244 cycles (int throughput)
245 {
246   tprintf("%d cycles\n", throughput);
247   regs.cycle_count += throughput;
248 }
249 
250 /* Number of execution (E) cycles the op uses.  For memory sources, we
251    include the load micro-op stall as two extra E cycles.  */
252 #define E(c) cycles (memory_source ? c + 2 : c)
253 #define E1 cycles (1)
254 #define E2 cycles (2)
255 #define EBIT cycles (memory_source ? 2 : 1)
256 
257 /* Check to see if a read latency must be applied for a given register.  */
258 #define RL(r) \
259   if (regs.rt == r )							\
260     {									\
261       tprintf("register %d load stall\n", r);				\
262       regs.cycle_count ++;						\
263       STATS(register_stalls ++);					\
264       regs.rt = -1;							\
265     }
266 
267 #define RLD(r)					\
268   if (memory_source)				\
269     {						\
270       tprintf ("Rt now %d\n", r);		\
271       new_rt = r;				\
272     }
273 
274 static int
275 lsb_count (unsigned long v, int is_signed)
276 {
277   int i, lsb;
278   if (is_signed && (v & 0x80000000U))
279     v = (unsigned long)(long)(-v);
280   for (i=31; i>=0; i--)
281     if (v & (1 << i))
282       {
283 	/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
284 	lsb = (i + 2) / 2;
285 	return lsb;
286       }
287   return 0;
288 }
289 
290 static int
291 divu_cycles(unsigned long num, unsigned long den)
292 {
293   int nb = lsb_count (num, 0);
294   int db = lsb_count (den, 0);
295   int rv;
296 
297   if (nb < db)
298     rv = 2;
299   else
300     rv = 3 + nb - db;
301   E (rv);
302   return rv;
303 }
304 
305 static int
306 div_cycles(long num, long den)
307 {
308   int nb = lsb_count ((unsigned long)num, 1);
309   int db = lsb_count ((unsigned long)den, 1);
310   int rv;
311 
312   if (nb < db)
313     rv = 3;
314   else
315     rv = 5 + nb - db;
316   E (rv);
317   return rv;
318 }
319 
320 #else /* !CYCLE_ACCURATE */
321 
322 #define cycles(t)
323 #define E(c)
324 #define E1
325 #define E2
326 #define EBIT
327 #define RL(r)
328 #define RLD(r)
329 
330 #define divu_cycles(n,d)
331 #define div_cycles(n,d)
332 
333 #endif /* else CYCLE_ACCURATE */
334 
335 static const int size2bytes[] = {
336   4, 1, 1, 1, 2, 2, 2, 3, 4
337 };
338 
339 typedef struct {
340   unsigned long dpc;
341 } RX_Data;
342 
343 #define rx_abort() _rx_abort(__FILE__, __LINE__)
344 static void ATTRIBUTE_NORETURN
345 _rx_abort (const char *file, int line)
346 {
347   if (strrchr (file, '/'))
348     file = strrchr (file, '/') + 1;
349   fprintf(stderr, "abort at %s:%d\n", file, line);
350   abort();
351 }
352 
353 static unsigned char *get_byte_base;
354 static RX_Opcode_Decoded **decode_cache_base;
355 static SI get_byte_page;
356 
357 void
358 reset_decoder (void)
359 {
360   get_byte_base = 0;
361   decode_cache_base = 0;
362   get_byte_page = 0;
363 }
364 
365 static inline void
366 maybe_get_mem_page (SI tpc)
367 {
368   if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
369     {
370       get_byte_page = tpc & NONPAGE_MASK;
371       get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
372       decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
373     }
374 }
375 
376 /* This gets called a *lot* so optimize it.  */
377 static int
378 rx_get_byte (void *vdata)
379 {
380   RX_Data *rx_data = (RX_Data *)vdata;
381   SI tpc = rx_data->dpc;
382 
383   /* See load.c for an explanation of this.  */
384   if (rx_big_endian)
385     tpc ^= 3;
386 
387   maybe_get_mem_page (tpc);
388 
389   rx_data->dpc ++;
390   return get_byte_base [tpc];
391 }
392 
393 static int
394 get_op (const RX_Opcode_Decoded *rd, int i)
395 {
396   const RX_Opcode_Operand *o = rd->op + i;
397   int addr, rv = 0;
398 
399   switch (o->type)
400     {
401     case RX_Operand_None:
402       rx_abort ();
403 
404     case RX_Operand_Immediate:	/* #addend */
405       return o->addend;
406 
407     case RX_Operand_Register:	/* Rn */
408       RL (o->reg);
409       rv = get_reg (o->reg);
410       break;
411 
412     case RX_Operand_Predec:	/* [-Rn] */
413       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
414       ATTRIBUTE_FALLTHROUGH;
415     case RX_Operand_Postinc:	/* [Rn+] */
416     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
417     case RX_Operand_Indirect:	/* [Rn + addend] */
418     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
419 #ifdef CYCLE_ACCURATE
420       RL (o->reg);
421       if (o->type == RX_Operand_TwoReg)
422 	RL (rd->op[2].reg);
423       regs.rt = -1;
424       if (regs.m2m == M2M_BOTH)
425 	{
426 	  tprintf("src memory stall\n");
427 #ifdef WITH_PROFILE
428 	  memory_stalls ++;
429 #endif
430 	  regs.cycle_count ++;
431 	  regs.m2m = 0;
432 	}
433 
434       memory_source = 1;
435 #endif
436 
437       if (o->type == RX_Operand_TwoReg)
438 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
439       else
440 	addr = get_reg (o->reg) + o->addend;
441 
442       switch (o->size)
443 	{
444 	default:
445 	case RX_AnySize:
446 	  rx_abort ();
447 
448 	case RX_Byte: /* undefined extension */
449 	case RX_UByte:
450 	case RX_SByte:
451 	  rv = mem_get_qi (addr);
452 	  break;
453 
454 	case RX_Word: /* undefined extension */
455 	case RX_UWord:
456 	case RX_SWord:
457 	  rv = mem_get_hi (addr);
458 	  break;
459 
460 	case RX_3Byte:
461 	  rv = mem_get_psi (addr);
462 	  break;
463 
464 	case RX_Long:
465 	  rv = mem_get_si (addr);
466 	  break;
467 	}
468 
469       if (o->type == RX_Operand_Postinc)
470 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
471 
472       break;
473 
474     case RX_Operand_Condition:	/* eq, gtu, etc */
475       return condition_true (o->reg);
476 
477     case RX_Operand_Flag:	/* [UIOSZC] */
478       return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
479     }
480 
481   /* if we've gotten here, we need to clip/extend the value according
482      to the size.  */
483   switch (o->size)
484     {
485     default:
486     case RX_AnySize:
487       rx_abort ();
488 
489     case RX_Byte: /* undefined extension */
490       rv |= 0xdeadbe00; /* keep them honest */
491       break;
492 
493     case RX_UByte:
494       rv &= 0xff;
495       break;
496 
497     case RX_SByte:
498       rv = sign_ext (rv, 8);
499       break;
500 
501     case RX_Word: /* undefined extension */
502       rv |= 0xdead0000; /* keep them honest */
503       break;
504 
505     case RX_UWord:
506       rv &=  0xffff;
507       break;
508 
509     case RX_SWord:
510       rv = sign_ext (rv, 16);
511       break;
512 
513     case RX_3Byte:
514       rv &= 0xffffff;
515       break;
516 
517     case RX_Long:
518       break;
519     }
520   return rv;
521 }
522 
523 static void
524 put_op (const RX_Opcode_Decoded *rd, int i, int v)
525 {
526   const RX_Opcode_Operand *o = rd->op + i;
527   int addr;
528 
529   switch (o->size)
530     {
531     default:
532     case RX_AnySize:
533       if (o->type != RX_Operand_Register)
534 	rx_abort ();
535       break;
536 
537     case RX_Byte: /* undefined extension */
538       v |= 0xdeadbe00; /* keep them honest */
539       break;
540 
541     case RX_UByte:
542       v &= 0xff;
543       break;
544 
545     case RX_SByte:
546       v = sign_ext (v, 8);
547       break;
548 
549     case RX_Word: /* undefined extension */
550       v |= 0xdead0000; /* keep them honest */
551       break;
552 
553     case RX_UWord:
554       v &=  0xffff;
555       break;
556 
557     case RX_SWord:
558       v = sign_ext (v, 16);
559       break;
560 
561     case RX_3Byte:
562       v &= 0xffffff;
563       break;
564 
565     case RX_Long:
566       break;
567     }
568 
569   switch (o->type)
570     {
571     case RX_Operand_None:
572       /* Opcodes like TST and CMP use this.  */
573       break;
574 
575     case RX_Operand_Immediate:	/* #addend */
576     case RX_Operand_Condition:	/* eq, gtu, etc */
577       rx_abort ();
578 
579     case RX_Operand_Register:	/* Rn */
580       put_reg (o->reg, v);
581       RLD (o->reg);
582       break;
583 
584     case RX_Operand_Predec:	/* [-Rn] */
585       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
586       ATTRIBUTE_FALLTHROUGH;
587     case RX_Operand_Postinc:	/* [Rn+] */
588     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
589     case RX_Operand_Indirect:	/* [Rn + addend] */
590     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
591 
592 #ifdef CYCLE_ACCURATE
593       if (regs.m2m == M2M_BOTH)
594 	{
595 	  tprintf("dst memory stall\n");
596 	  regs.cycle_count ++;
597 #ifdef WITH_PROFILE
598 	  memory_stalls ++;
599 #endif
600 	  regs.m2m = 0;
601 	}
602       memory_dest = 1;
603 #endif
604 
605       if (o->type == RX_Operand_TwoReg)
606 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
607       else
608 	addr = get_reg (o->reg) + o->addend;
609 
610       switch (o->size)
611 	{
612 	default:
613 	case RX_AnySize:
614 	  rx_abort ();
615 
616 	case RX_Byte: /* undefined extension */
617 	case RX_UByte:
618 	case RX_SByte:
619 	  mem_put_qi (addr, v);
620 	  break;
621 
622 	case RX_Word: /* undefined extension */
623 	case RX_UWord:
624 	case RX_SWord:
625 	  mem_put_hi (addr, v);
626 	  break;
627 
628 	case RX_3Byte:
629 	  mem_put_psi (addr, v);
630 	  break;
631 
632 	case RX_Long:
633 	  mem_put_si (addr, v);
634 	  break;
635 	}
636 
637       if (o->type == RX_Operand_Postinc)
638 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
639 
640       break;
641 
642     case RX_Operand_Flag:	/* [UIOSZC] */
643       if (v)
644 	regs.r_psw |= (1 << o->reg);
645       else
646 	regs.r_psw &= ~(1 << o->reg);
647       break;
648     }
649 }
650 
651 #define PD(x) put_op (opcode, 0, x)
652 #define PS(x) put_op (opcode, 1, x)
653 #define PS2(x) put_op (opcode, 2, x)
654 #define GD() get_op (opcode, 0)
655 #define GS() get_op (opcode, 1)
656 #define GS2() get_op (opcode, 2)
657 #define DSZ() size2bytes[opcode->op[0].size]
658 #define SSZ() size2bytes[opcode->op[0].size]
659 #define S2SZ() size2bytes[opcode->op[0].size]
660 
661 /* "Universal" sources.  */
662 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
663 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
664 
665 static void
666 push(int val)
667 {
668   int rsp = get_reg (sp);
669   rsp -= 4;
670   put_reg (sp, rsp);
671   mem_put_si (rsp, val);
672 }
673 
674 /* Just like the above, but tag the memory as "pushed pc" so if anyone
675    tries to write to it, it will cause an error.  */
676 static void
677 pushpc(int val)
678 {
679   int rsp = get_reg (sp);
680   rsp -= 4;
681   put_reg (sp, rsp);
682   mem_put_si (rsp, val);
683   mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
684 }
685 
686 static int
687 pop (void)
688 {
689   int rv;
690   int rsp = get_reg (sp);
691   rv = mem_get_si (rsp);
692   rsp += 4;
693   put_reg (sp, rsp);
694   return rv;
695 }
696 
697 static int
698 poppc (void)
699 {
700   int rv;
701   int rsp = get_reg (sp);
702   if (mem_get_content_type (rsp) != MC_PUSHED_PC)
703     execution_error (SIM_ERR_CORRUPT_STACK, rsp);
704   rv = mem_get_si (rsp);
705   mem_set_content_range (rsp, rsp+3, MC_UNINIT);
706   rsp += 4;
707   put_reg (sp, rsp);
708   return rv;
709 }
710 
711 #define MATH_OP(vop,c)				\
712 { \
713   umb = US2(); \
714   uma = US1(); \
715   ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
716   tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
717   ma = sign_ext (uma, DSZ() * 8);					\
718   mb = sign_ext (umb, DSZ() * 8);					\
719   sll = (long long) ma vop (long long) mb vop c; \
720   tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
721   set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
722   PD (sll); \
723   E (1);    \
724 }
725 
726 #define LOGIC_OP(vop) \
727 { \
728   mb = US2(); \
729   ma = US1(); \
730   v = ma vop mb; \
731   tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
732   set_sz (v, DSZ()); \
733   PD(v); \
734   E (1); \
735 }
736 
737 #define SHIFT_OP(val, type, count, OP, carry_mask)	\
738 { \
739   int i, c=0; \
740   count = US2(); \
741   val = (type)US1();				\
742   tprintf("%lld " #OP " %d\n", val, count); \
743   for (i = 0; i < count; i ++) \
744     { \
745       c = val & carry_mask; \
746       val OP 1; \
747     } \
748   set_oszc (val, 4, c); \
749   PD (val); \
750 }
751 
752 typedef union {
753   int i;
754   float f;
755 } FloatInt;
756 
757 ATTRIBUTE_UNUSED
758 static inline int
759 float2int (float f)
760 {
761   FloatInt fi;
762   fi.f = f;
763   return fi.i;
764 }
765 
766 static inline float
767 int2float (int i)
768 {
769   FloatInt fi;
770   fi.i = i;
771   return fi.f;
772 }
773 
774 static int
775 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
776 {
777   *d = rxfp_add (s1, s2);
778   return 1;
779 }
780 
781 static int
782 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
783 {
784   *d = rxfp_mul (s1, s2);
785   return 1;
786 }
787 
788 static int
789 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
790 {
791   *d = rxfp_div (s1, s2);
792   return 1;
793 }
794 
795 static int
796 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
797 {
798   *d = rxfp_sub (s1, s2);
799   return 1;
800 }
801 
802 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
803 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
804 #define FPCHECK() \
805   if (FPPENDING()) \
806     return do_fp_exception (opcode_pc)
807 
808 #define FLOAT_OP(func) \
809 { \
810   int do_store;   \
811   fp_t fa, fb, fc; \
812   FPCLEAR(); \
813   fb = GS (); \
814   fa = GD (); \
815   do_store = fop_##func (fa, fb, &fc); \
816   tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
817   FPCHECK(); \
818   if (do_store) \
819     PD (fc);	\
820   mb = 0; \
821   if ((fc & 0x80000000UL) != 0) \
822     mb |= FLAGBIT_S; \
823   if ((fc & 0x7fffffffUL) == 0)			\
824     mb |= FLAGBIT_Z; \
825   set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
826 }
827 
828 #define carry (FLAG_C ? 1 : 0)
829 
830 static struct {
831   unsigned long vaddr;
832   const char *str;
833   int signal;
834 } exception_info[] = {
835   { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
836   { 0xFFFFFFD4UL, "access violation", SIGSEGV },
837   { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
838   { 0xFFFFFFE4UL, "floating point", SIGFPE }
839 };
840 #define EX_PRIVILEDGED	0
841 #define EX_ACCESS	1
842 #define EX_UNDEFINED	2
843 #define EX_FLOATING	3
844 #define EXCEPTION(n)  \
845   return generate_exception (n, opcode_pc)
846 
847 #define PRIVILEDGED() \
848   if (FLAG_PM) \
849     EXCEPTION (EX_PRIVILEDGED)
850 
851 static int
852 generate_exception (unsigned long type, SI opcode_pc)
853 {
854   SI old_psw, old_pc, new_pc;
855 
856   new_pc = mem_get_si (exception_info[type].vaddr);
857   /* 0x00020000 is the value used to initialise the known
858      exception vectors (see rx.ld), but it is a reserved
859      area of memory so do not try to access it, and if the
860      value has not been changed by the program then the
861      vector has not been installed.  */
862   if (new_pc == 0 || new_pc == 0x00020000)
863     {
864       if (rx_in_gdb)
865 	return RX_MAKE_STOPPED (exception_info[type].signal);
866 
867       fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
868 	      exception_info[type].str, (unsigned long) opcode_pc);
869       if (type == EX_FLOATING)
870 	{
871 	  int mask = FPPENDING ();
872 	  fprintf (stderr, "Pending FP exceptions:");
873 	  if (mask & FPSWBITS_FV)
874 	    fprintf(stderr, " Invalid");
875 	  if (mask & FPSWBITS_FO)
876 	    fprintf(stderr, " Overflow");
877 	  if (mask & FPSWBITS_FZ)
878 	    fprintf(stderr, " Division-by-zero");
879 	  if (mask & FPSWBITS_FU)
880 	    fprintf(stderr, " Underflow");
881 	  if (mask & FPSWBITS_FX)
882 	    fprintf(stderr, " Inexact");
883 	  if (mask & FPSWBITS_CE)
884 	    fprintf(stderr, " Unimplemented");
885 	  fprintf(stderr, "\n");
886 	}
887       return RX_MAKE_EXITED (1);
888     }
889 
890   tprintf ("Triggering %s exception\n", exception_info[type].str);
891 
892   old_psw = regs.r_psw;
893   regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
894   old_pc = opcode_pc;
895   regs.r_pc = new_pc;
896   pushpc (old_psw);
897   pushpc (old_pc);
898   return RX_MAKE_STEPPED ();
899 }
900 
901 void
902 generate_access_exception (void)
903 {
904   int rv;
905 
906   rv = generate_exception (EX_ACCESS, regs.r_pc);
907   if (RX_EXITED (rv))
908     longjmp (decode_jmp_buf, rv);
909 }
910 
911 static int
912 do_fp_exception (unsigned long opcode_pc)
913 {
914   while (FPPENDING())
915     EXCEPTION (EX_FLOATING);
916   return RX_MAKE_STEPPED ();
917 }
918 
919 static int
920 op_is_memory (const RX_Opcode_Decoded *rd, int i)
921 {
922   switch (rd->op[i].type)
923     {
924     case RX_Operand_Predec:
925     case RX_Operand_Postinc:
926     case RX_Operand_Indirect:
927       return 1;
928     default:
929       return 0;
930     }
931 }
932 #define OM(i) op_is_memory (opcode, i)
933 
934 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
935 
936 int
937 decode_opcode (void)
938 {
939   unsigned int uma=0, umb=0;
940   int ma=0, mb=0;
941   int opcode_size, v;
942   unsigned long long ll;
943   long long sll;
944   unsigned long opcode_pc;
945   RX_Data rx_data;
946   const RX_Opcode_Decoded *opcode;
947 #ifdef WITH_PROFILE
948   unsigned long long prev_cycle_count;
949 #endif
950 #ifdef CYCLE_ACCURATE
951   unsigned int tx;
952 #endif
953 
954 #ifdef WITH_PROFILE
955   prev_cycle_count = regs.cycle_count;
956 #endif
957 
958 #ifdef CYCLE_ACCURATE
959   memory_source = 0;
960   memory_dest = 0;
961 #endif
962 
963   rx_cycles ++;
964 
965   maybe_get_mem_page (regs.r_pc);
966 
967   opcode_pc = regs.r_pc;
968 
969   /* Note that we don't word-swap this point, there's no point.  */
970   if (decode_cache_base[opcode_pc] == NULL)
971     {
972       RX_Opcode_Decoded *opcode_w;
973       rx_data.dpc = opcode_pc;
974       opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
975       opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
976 				      rx_get_byte, &rx_data);
977       opcode = opcode_w;
978     }
979   else
980     {
981       opcode = decode_cache_base[opcode_pc];
982       opcode_size = opcode->n_bytes;
983     }
984 
985 #ifdef CYCLE_ACCURATE
986   if (branch_alignment_penalty)
987     {
988       if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
989 	{
990 	  tprintf("1 cycle branch alignment penalty\n");
991 	  cycles (branch_alignment_penalty);
992 #ifdef WITH_PROFILE
993 	  branch_alignment_stalls ++;
994 #endif
995 	}
996       branch_alignment_penalty = 0;
997     }
998 #endif
999 
1000   regs.r_pc += opcode_size;
1001 
1002   rx_flagmask = opcode->flags_s;
1003   rx_flagand = ~(int)opcode->flags_0;
1004   rx_flagor = opcode->flags_1;
1005 
1006   switch (opcode->id)
1007     {
1008     case RXO_abs:
1009       sll = GS ();
1010       tprintf("|%lld| = ", sll);
1011       if (sll < 0)
1012 	sll = -sll;
1013       tprintf("%lld\n", sll);
1014       PD (sll);
1015       set_osz (sll, 4);
1016       E (1);
1017       break;
1018 
1019     case RXO_adc:
1020       MATH_OP (+,carry);
1021       break;
1022 
1023     case RXO_add:
1024       MATH_OP (+,0);
1025       break;
1026 
1027     case RXO_and:
1028       LOGIC_OP (&);
1029       break;
1030 
1031     case RXO_bclr:
1032       ma = GD ();
1033       mb = GS ();
1034       if (opcode->op[0].type == RX_Operand_Register)
1035 	mb &= 0x1f;
1036       else
1037 	mb &= 0x07;
1038       ma &= ~(1 << mb);
1039       PD (ma);
1040       EBIT;
1041       break;
1042 
1043     case RXO_bmcc:
1044       ma = GD ();
1045       mb = GS ();
1046       if (opcode->op[0].type == RX_Operand_Register)
1047 	mb &= 0x1f;
1048       else
1049 	mb &= 0x07;
1050       if (GS2 ())
1051 	ma |= (1 << mb);
1052       else
1053 	ma &= ~(1 << mb);
1054       PD (ma);
1055       EBIT;
1056       break;
1057 
1058     case RXO_bnot:
1059       ma = GD ();
1060       mb = GS ();
1061       if (opcode->op[0].type == RX_Operand_Register)
1062 	mb &= 0x1f;
1063       else
1064 	mb &= 0x07;
1065       ma ^= (1 << mb);
1066       PD (ma);
1067       EBIT;
1068       break;
1069 
1070     case RXO_branch:
1071       if (opcode->op[1].type == RX_Operand_None || GS())
1072 	{
1073 #ifdef CYCLE_ACCURATE
1074 	  SI old_pc = regs.r_pc;
1075 	  int delta;
1076 #endif
1077 	  regs.r_pc = GD();
1078 #ifdef CYCLE_ACCURATE
1079 	  delta = regs.r_pc - old_pc;
1080 	  if (delta >= 0 && delta < 16
1081 	      && opcode_size > 1)
1082 	    {
1083 	      tprintf("near forward branch bonus\n");
1084 	      cycles (2);
1085 	    }
1086 	  else
1087 	    {
1088 	      cycles (3);
1089 	      branch_alignment_penalty = 1;
1090 	    }
1091 #ifdef WITH_PROFILE
1092 	  branch_stalls ++;
1093 #endif
1094 #endif
1095 	}
1096 #ifdef CYCLE_ACCURATE
1097       else
1098 	cycles (1);
1099 #endif
1100       break;
1101 
1102     case RXO_branchrel:
1103       if (opcode->op[1].type == RX_Operand_None || GS())
1104 	{
1105 	  int delta = GD();
1106 	  regs.r_pc = opcode_pc + delta;
1107 #ifdef CYCLE_ACCURATE
1108 	  /* Note: specs say 3, chip says 2.  */
1109 	  if (delta >= 0 && delta < 16
1110 	      && opcode_size > 1)
1111 	    {
1112 	      tprintf("near forward branch bonus\n");
1113 	      cycles (2);
1114 	    }
1115 	  else
1116 	    {
1117 	      cycles (3);
1118 	      branch_alignment_penalty = 1;
1119 	    }
1120 #ifdef WITH_PROFILE
1121 	  branch_stalls ++;
1122 #endif
1123 #endif
1124 	}
1125 #ifdef CYCLE_ACCURATE
1126       else
1127 	cycles (1);
1128 #endif
1129       break;
1130 
1131     case RXO_brk:
1132       {
1133 	int old_psw = regs.r_psw;
1134 	if (rx_in_gdb)
1135 	  DO_RETURN (RX_MAKE_HIT_BREAK ());
1136 	if (regs.r_intb == 0)
1137 	  {
1138 	    tprintf("BREAK hit, no vector table.\n");
1139 	    DO_RETURN (RX_MAKE_EXITED(1));
1140 	  }
1141 	regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1142 	pushpc (old_psw);
1143 	pushpc (regs.r_pc);
1144 	regs.r_pc = mem_get_si (regs.r_intb);
1145 	cycles(6);
1146       }
1147       break;
1148 
1149     case RXO_bset:
1150       ma = GD ();
1151       mb = GS ();
1152       if (opcode->op[0].type == RX_Operand_Register)
1153 	mb &= 0x1f;
1154       else
1155 	mb &= 0x07;
1156       ma |= (1 << mb);
1157       PD (ma);
1158       EBIT;
1159       break;
1160 
1161     case RXO_btst:
1162       ma = GS ();
1163       mb = GS2 ();
1164       if (opcode->op[1].type == RX_Operand_Register)
1165 	mb &= 0x1f;
1166       else
1167 	mb &= 0x07;
1168       umb = ma & (1 << mb);
1169       set_zc (! umb, umb);
1170       EBIT;
1171       break;
1172 
1173     case RXO_clrpsw:
1174       v = 1 << opcode->op[0].reg;
1175       if (FLAG_PM
1176 	  && (v == FLAGBIT_I
1177 	      || v == FLAGBIT_U))
1178 	break;
1179       regs.r_psw &= ~v;
1180       cycles (1);
1181       break;
1182 
1183     case RXO_div: /* d = d / s */
1184       ma = GS();
1185       mb = GD();
1186       tprintf("%d / %d = ", mb, ma);
1187       if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1188 	{
1189 	  tprintf("#NAN\n");
1190 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1191 	  cycles (3);
1192 	}
1193       else
1194 	{
1195 	  v = mb/ma;
1196 	  tprintf("%d\n", v);
1197 	  set_flags (FLAGBIT_O, 0);
1198 	  PD (v);
1199 	  div_cycles (mb, ma);
1200 	}
1201       break;
1202 
1203     case RXO_divu: /* d = d / s */
1204       uma = GS();
1205       umb = GD();
1206       tprintf("%u / %u = ", umb, uma);
1207       if (uma == 0)
1208 	{
1209 	  tprintf("#NAN\n");
1210 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1211 	  cycles (2);
1212 	}
1213       else
1214 	{
1215 	  v = umb / uma;
1216 	  tprintf("%u\n", v);
1217 	  set_flags (FLAGBIT_O, 0);
1218 	  PD (v);
1219 	  divu_cycles (umb, uma);
1220 	}
1221       break;
1222 
1223     case RXO_emul:
1224       ma = GD ();
1225       mb = GS ();
1226       sll = (long long)ma * (long long)mb;
1227       tprintf("%d * %d = %lld\n", ma, mb, sll);
1228       put_reg (opcode->op[0].reg, sll);
1229       put_reg (opcode->op[0].reg + 1, sll >> 32);
1230       E2;
1231       break;
1232 
1233     case RXO_emulu:
1234       uma = GD ();
1235       umb = GS ();
1236       ll = (long long)uma * (long long)umb;
1237       tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1238       put_reg (opcode->op[0].reg, ll);
1239       put_reg (opcode->op[0].reg + 1, ll >> 32);
1240       E2;
1241       break;
1242 
1243     case RXO_fadd:
1244       FLOAT_OP (fadd);
1245       E (4);
1246       break;
1247 
1248     case RXO_fcmp:
1249       ma = GD();
1250       mb = GS();
1251       FPCLEAR ();
1252       rxfp_cmp (ma, mb);
1253       FPCHECK ();
1254       E (1);
1255       break;
1256 
1257     case RXO_fdiv:
1258       FLOAT_OP (fdiv);
1259       E (16);
1260       break;
1261 
1262     case RXO_fmul:
1263       FLOAT_OP (fmul);
1264       E (3);
1265       break;
1266 
1267     case RXO_rtfi:
1268       PRIVILEDGED ();
1269       regs.r_psw = regs.r_bpsw;
1270       regs.r_pc = regs.r_bpc;
1271 #ifdef CYCLE_ACCURATE
1272       regs.fast_return = 0;
1273       cycles(3);
1274 #endif
1275       break;
1276 
1277     case RXO_fsub:
1278       FLOAT_OP (fsub);
1279       E (4);
1280       break;
1281 
1282     case RXO_ftoi:
1283       ma = GS ();
1284       FPCLEAR ();
1285       mb = rxfp_ftoi (ma, FPRM_ZERO);
1286       FPCHECK ();
1287       PD (mb);
1288       tprintf("(int) %g = %d\n", int2float(ma), mb);
1289       set_sz (mb, 4);
1290       E (2);
1291       break;
1292 
1293     case RXO_int:
1294       v = GS ();
1295       if (v == 255)
1296 	{
1297 	  int rc = rx_syscall (regs.r[5]);
1298 	  if (! RX_STEPPED (rc))
1299 	    DO_RETURN (rc);
1300 	}
1301       else
1302 	{
1303 	  int old_psw = regs.r_psw;
1304 	  regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1305 	  pushpc (old_psw);
1306 	  pushpc (regs.r_pc);
1307 	  regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1308 	}
1309       cycles (6);
1310       break;
1311 
1312     case RXO_itof:
1313       ma = GS ();
1314       FPCLEAR ();
1315       mb = rxfp_itof (ma, regs.r_fpsw);
1316       FPCHECK ();
1317       tprintf("(float) %d = %x\n", ma, mb);
1318       PD (mb);
1319       set_sz (ma, 4);
1320       E (2);
1321       break;
1322 
1323     case RXO_jsr:
1324     case RXO_jsrrel:
1325       {
1326 #ifdef CYCLE_ACCURATE
1327 	int delta;
1328 	regs.m2m = 0;
1329 #endif
1330 	v = GD ();
1331 #ifdef CYCLE_ACCURATE
1332 	regs.link_register = regs.r_pc;
1333 #endif
1334 	pushpc (get_reg (pc));
1335 	if (opcode->id == RXO_jsrrel)
1336 	  v += regs.r_pc;
1337 #ifdef CYCLE_ACCURATE
1338 	delta = v - regs.r_pc;
1339 #endif
1340 	put_reg (pc, v);
1341 #ifdef CYCLE_ACCURATE
1342 	/* Note: docs say 3, chip says 2 */
1343 	if (delta >= 0 && delta < 16)
1344 	  {
1345 	    tprintf ("near forward jsr bonus\n");
1346 	    cycles (2);
1347 	  }
1348 	else
1349 	  {
1350 	    branch_alignment_penalty = 1;
1351 	    cycles (3);
1352 	  }
1353 	regs.fast_return = 1;
1354 #endif
1355       }
1356       break;
1357 
1358     case RXO_machi:
1359       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1360       ll <<= 16;
1361       put_reg64 (acc64, ll + regs.r_acc);
1362       E1;
1363       break;
1364 
1365     case RXO_maclo:
1366       ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1367       ll <<= 16;
1368       put_reg64 (acc64, ll + regs.r_acc);
1369       E1;
1370       break;
1371 
1372     case RXO_max:
1373       mb = GS();
1374       ma = GD();
1375       if (ma > mb)
1376 	PD (ma);
1377       else
1378 	PD (mb);
1379       E (1);
1380       break;
1381 
1382     case RXO_min:
1383       mb = GS();
1384       ma = GD();
1385       if (ma < mb)
1386 	PD (ma);
1387       else
1388 	PD (mb);
1389       E (1);
1390       break;
1391 
1392     case RXO_mov:
1393       v = GS ();
1394 
1395       if (opcode->op[1].type == RX_Operand_Register
1396 	  && opcode->op[1].reg == 17 /* PC */)
1397 	{
1398 	  /* Special case.  We want the address of the insn, not the
1399 	     address of the next insn.  */
1400 	  v = opcode_pc;
1401 	}
1402 
1403       if (opcode->op[0].type == RX_Operand_Register
1404 	  && opcode->op[0].reg == 16 /* PSW */)
1405 	{
1406 	  /* Special case, LDC and POPC can't ever modify PM.  */
1407 	  int pm = regs.r_psw & FLAGBIT_PM;
1408 	  v &= ~ FLAGBIT_PM;
1409 	  v |= pm;
1410 	  if (pm)
1411 	    {
1412 	      v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1413 	      v |= pm;
1414 	    }
1415 	}
1416       if (FLAG_PM)
1417 	{
1418 	  /* various things can't be changed in user mode.  */
1419 	  if (opcode->op[0].type == RX_Operand_Register)
1420 	    if (opcode->op[0].reg == 32)
1421 	      {
1422 		v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1423 		v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1424 	      }
1425 	  if (opcode->op[0].reg == 34 /* ISP */
1426 	      || opcode->op[0].reg == 37 /* BPSW */
1427 	      || opcode->op[0].reg == 39 /* INTB */
1428 	      || opcode->op[0].reg == 38 /* VCT */)
1429 	    /* These are ignored.  */
1430 	    break;
1431 	}
1432       if (OM(0) && OM(1))
1433 	cycles (2);
1434       else
1435 	cycles (1);
1436 
1437       PD (v);
1438 
1439 #ifdef CYCLE_ACCURATE
1440       if ((opcode->op[0].type == RX_Operand_Predec
1441 	   && opcode->op[1].type == RX_Operand_Register)
1442 	  || (opcode->op[0].type == RX_Operand_Postinc
1443 	      && opcode->op[1].type == RX_Operand_Register))
1444 	{
1445 	  /* Special case: push reg doesn't cause a memory stall.  */
1446 	  memory_dest = 0;
1447 	  tprintf("push special case\n");
1448 	}
1449 #endif
1450 
1451       set_sz (v, DSZ());
1452       break;
1453 
1454     case RXO_movbi:
1455       PD (GS ());
1456       cycles (1);
1457       break;
1458 
1459     case RXO_movbir:
1460       PS (GD ());
1461       cycles (1);
1462       break;
1463 
1464     case RXO_mul:
1465       v = US2 ();
1466       ll = (unsigned long long) US1() * (unsigned long long) v;
1467       PD(ll);
1468       E (1);
1469       break;
1470 
1471     case RXO_mulhi:
1472       v = GS2 ();
1473       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1474       ll <<= 16;
1475       put_reg64 (acc64, ll);
1476       E1;
1477       break;
1478 
1479     case RXO_mullo:
1480       v = GS2 ();
1481       ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1482       ll <<= 16;
1483       put_reg64 (acc64, ll);
1484       E1;
1485       break;
1486 
1487     case RXO_mvfachi:
1488       PD (get_reg (acchi));
1489       E1;
1490       break;
1491 
1492     case RXO_mvfaclo:
1493       PD (get_reg (acclo));
1494       E1;
1495       break;
1496 
1497     case RXO_mvfacmi:
1498       PD (get_reg (accmi));
1499       E1;
1500       break;
1501 
1502     case RXO_mvtachi:
1503       put_reg (acchi, GS ());
1504       E1;
1505       break;
1506 
1507     case RXO_mvtaclo:
1508       put_reg (acclo, GS ());
1509       E1;
1510       break;
1511 
1512     case RXO_mvtipl:
1513       regs.r_psw &= ~ FLAGBITS_IPL;
1514       regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1515       E1;
1516       break;
1517 
1518     case RXO_nop:
1519     case RXO_nop2:
1520     case RXO_nop3:
1521     case RXO_nop4:
1522     case RXO_nop5:
1523     case RXO_nop6:
1524     case RXO_nop7:
1525       E1;
1526       break;
1527 
1528     case RXO_or:
1529       LOGIC_OP (|);
1530       break;
1531 
1532     case RXO_popm:
1533       /* POPM cannot pop R0 (sp).  */
1534       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1535 	EXCEPTION (EX_UNDEFINED);
1536       if (opcode->op[1].reg >= opcode->op[2].reg)
1537 	{
1538 	  regs.r_pc = opcode_pc;
1539 	  DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1540 	}
1541       for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1542 	{
1543 	  cycles (1);
1544 	  RLD (v);
1545 	  put_reg (v, pop ());
1546 	}
1547       break;
1548 
1549     case RXO_pushm:
1550       /* PUSHM cannot push R0 (sp).  */
1551       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1552 	EXCEPTION (EX_UNDEFINED);
1553       if (opcode->op[1].reg >= opcode->op[2].reg)
1554 	{
1555 	  regs.r_pc = opcode_pc;
1556 	  return RX_MAKE_STOPPED (SIGILL);
1557 	}
1558       for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1559 	{
1560 	  RL (v);
1561 	  push (get_reg (v));
1562 	}
1563       cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1564       break;
1565 
1566     case RXO_racw:
1567       ll = get_reg64 (acc64) << GS ();
1568       ll += 0x80000000ULL;
1569       if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1570 	ll = 0x00007fff00000000ULL;
1571       else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1572 	ll = 0xffff800000000000ULL;
1573       else
1574 	ll &= 0xffffffff00000000ULL;
1575       put_reg64 (acc64, ll);
1576       E1;
1577       break;
1578 
1579     case RXO_rte:
1580       PRIVILEDGED ();
1581       regs.r_pc = poppc ();
1582       regs.r_psw = poppc ();
1583       if (FLAG_PM)
1584 	regs.r_psw |= FLAGBIT_U;
1585 #ifdef CYCLE_ACCURATE
1586       regs.fast_return = 0;
1587       cycles (6);
1588 #endif
1589       break;
1590 
1591     case RXO_revl:
1592       uma = GS ();
1593       umb = (((uma >> 24) & 0xff)
1594 	     | ((uma >> 8) & 0xff00)
1595 	     | ((uma << 8) & 0xff0000)
1596 	     | ((uma << 24) & 0xff000000UL));
1597       PD (umb);
1598       E1;
1599       break;
1600 
1601     case RXO_revw:
1602       uma = GS ();
1603       umb = (((uma >> 8) & 0x00ff00ff)
1604 	     | ((uma << 8) & 0xff00ff00UL));
1605       PD (umb);
1606       E1;
1607       break;
1608 
1609     case RXO_rmpa:
1610       RL(4);
1611       RL(5);
1612 #ifdef CYCLE_ACCURATE
1613       tx = regs.r[3];
1614 #endif
1615 
1616       while (regs.r[3] != 0)
1617 	{
1618 	  long long tmp;
1619 
1620 	  switch (opcode->size)
1621 	    {
1622 	    case RX_Long:
1623 	      ma = mem_get_si (regs.r[1]);
1624 	      mb = mem_get_si (regs.r[2]);
1625 	      regs.r[1] += 4;
1626 	      regs.r[2] += 4;
1627 	      break;
1628 	    case RX_Word:
1629 	      ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1630 	      mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1631 	      regs.r[1] += 2;
1632 	      regs.r[2] += 2;
1633 	      break;
1634 	    case RX_Byte:
1635 	      ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1636 	      mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1637 	      regs.r[1] += 1;
1638 	      regs.r[2] += 1;
1639 	      break;
1640 	    default:
1641 	      abort ();
1642 	    }
1643 	  /* We do the multiply as a signed value.  */
1644 	  sll = (long long)ma * (long long)mb;
1645 	  tprintf("        %016llx = %d * %d\n", sll, ma, mb);
1646 	  /* but we do the sum as unsigned, while sign extending the operands.  */
1647 	  tmp = regs.r[4] + (sll & 0xffffffffUL);
1648 	  regs.r[4] = tmp & 0xffffffffUL;
1649 	  tmp >>= 32;
1650 	  sll >>= 32;
1651 	  tmp += regs.r[5] + (sll & 0xffffffffUL);
1652 	  regs.r[5] = tmp & 0xffffffffUL;
1653 	  tmp >>= 32;
1654 	  sll >>= 32;
1655 	  tmp += regs.r[6] + (sll & 0xffffffffUL);
1656 	  regs.r[6] = tmp & 0xffffffffUL;
1657 	  tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1658 		  (unsigned long) regs.r[6],
1659 		  (unsigned long) regs.r[5],
1660 		  (unsigned long) regs.r[4]);
1661 
1662 	  regs.r[3] --;
1663 	}
1664       if (regs.r[6] & 0x00008000)
1665 	regs.r[6] |= 0xffff0000UL;
1666       else
1667 	regs.r[6] &= 0x0000ffff;
1668       ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1669       if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1670 	set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1671       else
1672 	set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1673 #ifdef CYCLE_ACCURATE
1674       switch (opcode->size)
1675 	{
1676 	case RX_Long:
1677 	  cycles (6 + 4 * tx);
1678 	  break;
1679 	case RX_Word:
1680 	  cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1681 	  break;
1682 	case RX_Byte:
1683 	  cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1684 	  break;
1685 	default:
1686 	  abort ();
1687 	}
1688 #endif
1689       break;
1690 
1691     case RXO_rolc:
1692       v = GD ();
1693       ma = v & 0x80000000UL;
1694       v <<= 1;
1695       v |= carry;
1696       set_szc (v, 4, ma);
1697       PD (v);
1698       E1;
1699       break;
1700 
1701     case RXO_rorc:
1702       uma = GD ();
1703       mb = uma & 1;
1704       uma >>= 1;
1705       uma |= (carry ? 0x80000000UL : 0);
1706       set_szc (uma, 4, mb);
1707       PD (uma);
1708       E1;
1709       break;
1710 
1711     case RXO_rotl:
1712       mb = GS ();
1713       uma = GD ();
1714       if (mb)
1715 	{
1716 	  uma = (uma << mb) | (uma >> (32-mb));
1717 	  mb = uma & 1;
1718 	}
1719       set_szc (uma, 4, mb);
1720       PD (uma);
1721       E1;
1722       break;
1723 
1724     case RXO_rotr:
1725       mb = GS ();
1726       uma = GD ();
1727       if (mb)
1728 	{
1729 	  uma = (uma >> mb) | (uma << (32-mb));
1730 	  mb = uma & 0x80000000;
1731 	}
1732       set_szc (uma, 4, mb);
1733       PD (uma);
1734       E1;
1735       break;
1736 
1737     case RXO_round:
1738       ma = GS ();
1739       FPCLEAR ();
1740       mb = rxfp_ftoi (ma, regs.r_fpsw);
1741       FPCHECK ();
1742       PD (mb);
1743       tprintf("(int) %g = %d\n", int2float(ma), mb);
1744       set_sz (mb, 4);
1745       E (2);
1746       break;
1747 
1748     case RXO_rts:
1749       {
1750 #ifdef CYCLE_ACCURATE
1751 	int cyc = 5;
1752 #endif
1753 	regs.r_pc = poppc ();
1754 #ifdef CYCLE_ACCURATE
1755 	/* Note: specs say 5, chip says 3.  */
1756 	if (regs.fast_return && regs.link_register == regs.r_pc)
1757 	  {
1758 #ifdef WITH_PROFILE
1759 	    fast_returns ++;
1760 #endif
1761 	    tprintf("fast return bonus\n");
1762 	    cyc -= 2;
1763 	  }
1764 	cycles (cyc);
1765 	regs.fast_return = 0;
1766 	branch_alignment_penalty = 1;
1767 #endif
1768       }
1769       break;
1770 
1771     case RXO_rtsd:
1772       if (opcode->op[2].type == RX_Operand_Register)
1773 	{
1774 	  int i;
1775 	  /* RTSD cannot pop R0 (sp).  */
1776 	  put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1777 	  if (opcode->op[2].reg == 0)
1778 	    EXCEPTION (EX_UNDEFINED);
1779 #ifdef CYCLE_ACCURATE
1780 	  tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1781 #endif
1782 	  for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1783 	    {
1784 	      RLD (i);
1785 	      put_reg (i, pop ());
1786 	    }
1787 	}
1788       else
1789 	{
1790 #ifdef CYCLE_ACCURATE
1791 	  tx = 0;
1792 #endif
1793 	  put_reg (0, get_reg (0) + GS());
1794 	}
1795       put_reg (pc, poppc());
1796 #ifdef CYCLE_ACCURATE
1797       if (regs.fast_return && regs.link_register == regs.r_pc)
1798 	{
1799 	  tprintf("fast return bonus\n");
1800 #ifdef WITH_PROFILE
1801 	  fast_returns ++;
1802 #endif
1803 	  cycles (tx < 3 ? 3 : tx + 1);
1804 	}
1805       else
1806 	{
1807 	  cycles (tx < 5 ? 5 : tx + 1);
1808 	}
1809       regs.fast_return = 0;
1810       branch_alignment_penalty = 1;
1811 #endif
1812       break;
1813 
1814     case RXO_sat:
1815       if (FLAG_O && FLAG_S)
1816 	PD (0x7fffffffUL);
1817       else if (FLAG_O && ! FLAG_S)
1818 	PD (0x80000000UL);
1819       E1;
1820       break;
1821 
1822     case RXO_satr:
1823       if (FLAG_O && ! FLAG_S)
1824 	{
1825 	  put_reg (6, 0x0);
1826 	  put_reg (5, 0x7fffffff);
1827 	  put_reg (4, 0xffffffff);
1828 	}
1829       else if (FLAG_O && FLAG_S)
1830 	{
1831 	  put_reg (6, 0xffffffff);
1832 	  put_reg (5, 0x80000000);
1833 	  put_reg (4, 0x0);
1834 	}
1835       E1;
1836       break;
1837 
1838     case RXO_sbb:
1839       MATH_OP (-, ! carry);
1840       break;
1841 
1842     case RXO_sccnd:
1843       if (GS())
1844 	PD (1);
1845       else
1846 	PD (0);
1847       E1;
1848       break;
1849 
1850     case RXO_scmpu:
1851 #ifdef CYCLE_ACCURATE
1852       tx = regs.r[3];
1853 #endif
1854       while (regs.r[3] != 0)
1855 	{
1856 	  uma = mem_get_qi (regs.r[1] ++);
1857 	  umb = mem_get_qi (regs.r[2] ++);
1858 	  regs.r[3] --;
1859 	  if (uma != umb || uma == 0)
1860 	    break;
1861 	}
1862       if (uma == umb)
1863 	set_zc (1, 1);
1864       else
1865 	set_zc (0, ((int)uma - (int)umb) >= 0);
1866       cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1867       break;
1868 
1869     case RXO_setpsw:
1870       v = 1 << opcode->op[0].reg;
1871       if (FLAG_PM
1872 	  && (v == FLAGBIT_I
1873 	      || v == FLAGBIT_U))
1874 	break;
1875       regs.r_psw |= v;
1876       cycles (1);
1877       break;
1878 
1879     case RXO_smovb:
1880       RL (3);
1881 #ifdef CYCLE_ACCURATE
1882       tx = regs.r[3];
1883 #endif
1884       while (regs.r[3])
1885 	{
1886 	  uma = mem_get_qi (regs.r[2] --);
1887 	  mem_put_qi (regs.r[1]--, uma);
1888 	  regs.r[3] --;
1889 	}
1890 #ifdef CYCLE_ACCURATE
1891       if (tx > 3)
1892 	cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1893       else
1894 	cycles (2 + 3 * (tx % 4));
1895 #endif
1896       break;
1897 
1898     case RXO_smovf:
1899       RL (3);
1900 #ifdef CYCLE_ACCURATE
1901       tx = regs.r[3];
1902 #endif
1903       while (regs.r[3])
1904 	{
1905 	  uma = mem_get_qi (regs.r[2] ++);
1906 	  mem_put_qi (regs.r[1]++, uma);
1907 	  regs.r[3] --;
1908 	}
1909       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1910       break;
1911 
1912     case RXO_smovu:
1913 #ifdef CYCLE_ACCURATE
1914       tx = regs.r[3];
1915 #endif
1916       while (regs.r[3] != 0)
1917 	{
1918 	  uma = mem_get_qi (regs.r[2] ++);
1919 	  mem_put_qi (regs.r[1]++, uma);
1920 	  regs.r[3] --;
1921 	  if (uma == 0)
1922 	    break;
1923 	}
1924       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1925       break;
1926 
1927     case RXO_shar: /* d = ma >> mb */
1928       SHIFT_OP (sll, int, mb, >>=, 1);
1929       E (1);
1930       break;
1931 
1932     case RXO_shll: /* d = ma << mb */
1933       SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1934       E (1);
1935       break;
1936 
1937     case RXO_shlr: /* d = ma >> mb */
1938       SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1939       E (1);
1940       break;
1941 
1942     case RXO_sstr:
1943       RL (3);
1944 #ifdef CYCLE_ACCURATE
1945       tx = regs.r[3];
1946 #endif
1947       switch (opcode->size)
1948 	{
1949 	case RX_Long:
1950 	  while (regs.r[3] != 0)
1951 	    {
1952 	      mem_put_si (regs.r[1], regs.r[2]);
1953 	      regs.r[1] += 4;
1954 	      regs.r[3] --;
1955 	    }
1956 	  cycles (2 + tx);
1957 	  break;
1958 	case RX_Word:
1959 	  while (regs.r[3] != 0)
1960 	    {
1961 	      mem_put_hi (regs.r[1], regs.r[2]);
1962 	      regs.r[1] += 2;
1963 	      regs.r[3] --;
1964 	    }
1965 	  cycles (2 + (int)(tx / 2) + tx % 2);
1966 	  break;
1967 	case RX_Byte:
1968 	  while (regs.r[3] != 0)
1969 	    {
1970 	      mem_put_qi (regs.r[1], regs.r[2]);
1971 	      regs.r[1] ++;
1972 	      regs.r[3] --;
1973 	    }
1974 	  cycles (2 + (int)(tx / 4) + tx % 4);
1975 	  break;
1976 	default:
1977 	  abort ();
1978 	}
1979       break;
1980 
1981     case RXO_stcc:
1982       if (GS2())
1983 	PD (GS ());
1984       E1;
1985       break;
1986 
1987     case RXO_stop:
1988       PRIVILEDGED ();
1989       regs.r_psw |= FLAGBIT_I;
1990       DO_RETURN (RX_MAKE_STOPPED(0));
1991 
1992     case RXO_sub:
1993       MATH_OP (-, 0);
1994       break;
1995 
1996     case RXO_suntil:
1997       RL(3);
1998 #ifdef CYCLE_ACCURATE
1999       tx = 0;
2000 #endif
2001       if (regs.r[3] == 0)
2002 	{
2003 	  cycles (3);
2004 	  break;
2005 	}
2006       switch (opcode->size)
2007 	{
2008 	case RX_Long:
2009 	  uma = get_reg (2);
2010 	  while (regs.r[3] != 0)
2011 	    {
2012 	      regs.r[3] --;
2013 	      umb = mem_get_si (get_reg (1));
2014 	      regs.r[1] += 4;
2015 #ifdef CYCLE_ACCURATE
2016 	      tx ++;
2017 #endif
2018 	      if (umb == uma)
2019 		break;
2020 	    }
2021 #ifdef CYCLE_ACCURATE
2022 	  cycles (3 + 3 * tx);
2023 #endif
2024 	  break;
2025 	case RX_Word:
2026 	  uma = get_reg (2) & 0xffff;
2027 	  while (regs.r[3] != 0)
2028 	    {
2029 	      regs.r[3] --;
2030 	      umb = mem_get_hi (get_reg (1));
2031 	      regs.r[1] += 2;
2032 #ifdef CYCLE_ACCURATE
2033 	      tx ++;
2034 #endif
2035 	      if (umb == uma)
2036 		break;
2037 	    }
2038 #ifdef CYCLE_ACCURATE
2039 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2040 #endif
2041 	  break;
2042 	case RX_Byte:
2043 	  uma = get_reg (2) & 0xff;
2044 	  while (regs.r[3] != 0)
2045 	    {
2046 	      regs.r[3] --;
2047 	      umb = mem_get_qi (regs.r[1]);
2048 	      regs.r[1] += 1;
2049 #ifdef CYCLE_ACCURATE
2050 	      tx ++;
2051 #endif
2052 	      if (umb == uma)
2053 		break;
2054 	    }
2055 #ifdef CYCLE_ACCURATE
2056 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2057 #endif
2058 	  break;
2059 	default:
2060 	  abort();
2061 	}
2062       if (uma == umb)
2063 	set_zc (1, 1);
2064       else
2065 	set_zc (0, ((int)uma - (int)umb) >= 0);
2066       break;
2067 
2068     case RXO_swhile:
2069       RL(3);
2070 #ifdef CYCLE_ACCURATE
2071       tx = 0;
2072 #endif
2073       if (regs.r[3] == 0)
2074 	break;
2075       switch (opcode->size)
2076 	{
2077 	case RX_Long:
2078 	  uma = get_reg (2);
2079 	  while (regs.r[3] != 0)
2080 	    {
2081 	      regs.r[3] --;
2082 	      umb = mem_get_si (get_reg (1));
2083 	      regs.r[1] += 4;
2084 #ifdef CYCLE_ACCURATE
2085 	      tx ++;
2086 #endif
2087 	      if (umb != uma)
2088 		break;
2089 	    }
2090 #ifdef CYCLE_ACCURATE
2091 	  cycles (3 + 3 * tx);
2092 #endif
2093 	  break;
2094 	case RX_Word:
2095 	  uma = get_reg (2) & 0xffff;
2096 	  while (regs.r[3] != 0)
2097 	    {
2098 	      regs.r[3] --;
2099 	      umb = mem_get_hi (get_reg (1));
2100 	      regs.r[1] += 2;
2101 #ifdef CYCLE_ACCURATE
2102 	      tx ++;
2103 #endif
2104 	      if (umb != uma)
2105 		break;
2106 	    }
2107 #ifdef CYCLE_ACCURATE
2108 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2109 #endif
2110 	  break;
2111 	case RX_Byte:
2112 	  uma = get_reg (2) & 0xff;
2113 	  while (regs.r[3] != 0)
2114 	    {
2115 	      regs.r[3] --;
2116 	      umb = mem_get_qi (regs.r[1]);
2117 	      regs.r[1] += 1;
2118 #ifdef CYCLE_ACCURATE
2119 	      tx ++;
2120 #endif
2121 	      if (umb != uma)
2122 		break;
2123 	    }
2124 #ifdef CYCLE_ACCURATE
2125 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2126 #endif
2127 	  break;
2128 	default:
2129 	  abort();
2130 	}
2131       if (uma == umb)
2132 	set_zc (1, 1);
2133       else
2134 	set_zc (0, ((int)uma - (int)umb) >= 0);
2135       break;
2136 
2137     case RXO_wait:
2138       PRIVILEDGED ();
2139       regs.r_psw |= FLAGBIT_I;
2140       DO_RETURN (RX_MAKE_STOPPED(0));
2141 
2142     case RXO_xchg:
2143 #ifdef CYCLE_ACCURATE
2144       regs.m2m = 0;
2145 #endif
2146       v = GS (); /* This is the memory operand, if any.  */
2147       PS (GD ()); /* and this may change the address register.  */
2148       PD (v);
2149       E2;
2150 #ifdef CYCLE_ACCURATE
2151       /* all M cycles happen during xchg's cycles.  */
2152       memory_dest = 0;
2153       memory_source = 0;
2154 #endif
2155       break;
2156 
2157     case RXO_xor:
2158       LOGIC_OP (^);
2159       break;
2160 
2161     default:
2162       EXCEPTION (EX_UNDEFINED);
2163     }
2164 
2165 #ifdef CYCLE_ACCURATE
2166   regs.m2m = 0;
2167   if (memory_source)
2168     regs.m2m |= M2M_SRC;
2169   if (memory_dest)
2170     regs.m2m |= M2M_DST;
2171 
2172   regs.rt = new_rt;
2173   new_rt = -1;
2174 #endif
2175 
2176 #ifdef WITH_PROFILE
2177   if (prev_cycle_count == regs.cycle_count)
2178     {
2179       printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2180       abort ();
2181     }
2182 #endif
2183 
2184 #ifdef WITH_PROFILE
2185   if (running_benchmark)
2186     {
2187       int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2188 
2189 
2190       cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2191       times_per_id[opcode->id][omap] ++;
2192 
2193       times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2194 
2195       prev_opcode_id = opcode->id;
2196       po0 = omap;
2197     }
2198 #endif
2199 
2200   return RX_MAKE_STEPPED ();
2201 }
2202 
2203 #ifdef WITH_PROFILE
2204 void
2205 reset_pipeline_stats (void)
2206 {
2207   memset (cycles_per_id, 0, sizeof(cycles_per_id));
2208   memset (times_per_id, 0, sizeof(times_per_id));
2209   memory_stalls = 0;
2210   register_stalls = 0;
2211   branch_stalls = 0;
2212   branch_alignment_stalls = 0;
2213   fast_returns = 0;
2214   memset (times_per_pair, 0, sizeof(times_per_pair));
2215   running_benchmark = 1;
2216 
2217   benchmark_start_cycle = regs.cycle_count;
2218 }
2219 
2220 void
2221 halt_pipeline_stats (void)
2222 {
2223   running_benchmark = 0;
2224   benchmark_end_cycle = regs.cycle_count;
2225 }
2226 #endif
2227 
2228 void
2229 pipeline_stats (void)
2230 {
2231 #ifdef WITH_PROFILE
2232   int i, o1;
2233   int p, p1;
2234 #endif
2235 
2236 #ifdef CYCLE_ACCURATE
2237   if (verbose == 1)
2238     {
2239       printf ("cycles: %llu\n", regs.cycle_count);
2240       return;
2241     }
2242 
2243   printf ("cycles: %13s\n", comma (regs.cycle_count));
2244 #endif
2245 
2246 #ifdef WITH_PROFILE
2247   if (benchmark_start_cycle)
2248     printf ("bmark:  %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2249 
2250   printf("\n");
2251   for (i = 0; i < N_RXO; i++)
2252     for (o1 = 0; o1 < N_MAP; o1 ++)
2253       if (times_per_id[i][o1])
2254 	printf("%13s %13s %7.2f  %s %s\n",
2255 	       comma (cycles_per_id[i][o1]),
2256 	       comma (times_per_id[i][o1]),
2257 	       (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2258 	       op_cache_string(o1),
2259 	       id_names[i]+4);
2260 
2261   printf("\n");
2262   for (p = 0; p < N_RXO; p ++)
2263     for (p1 = 0; p1 < N_MAP; p1 ++)
2264       for (i = 0; i < N_RXO; i ++)
2265 	for (o1 = 0; o1 < N_MAP; o1 ++)
2266 	  if (times_per_pair[p][p1][i][o1])
2267 	    {
2268 	      printf("%13s   %s %-9s  ->  %s %s\n",
2269 		     comma (times_per_pair[p][p1][i][o1]),
2270 		     op_cache_string(p1),
2271 		     id_names[p]+4,
2272 		     op_cache_string(o1),
2273 		     id_names[i]+4);
2274 	    }
2275 
2276   printf("\n");
2277   printf("%13s memory stalls\n", comma (memory_stalls));
2278   printf("%13s register stalls\n", comma (register_stalls));
2279   printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2280   printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2281   printf("%13s fast returns\n", comma (fast_returns));
2282 #endif
2283 }
2284