xref: /netbsd-src/external/gpl3/gdb.old/dist/sim/rx/rx.c (revision 8b657b0747480f8989760d71343d6dd33f8d4cf9)
1 /* rx.c --- opcode semantics for stand-alone RX simulator.
2 
3 Copyright (C) 2008-2023 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
5 
6 This file is part of the GNU simulators.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 /* This must come before any other includes.  */
22 #include "defs.h"
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <signal.h>
28 #include "libiberty.h"
29 
30 #include "opcode/rx.h"
31 #include "cpu.h"
32 #include "mem.h"
33 #include "syscalls.h"
34 #include "fpu.h"
35 #include "err.h"
36 #include "misc.h"
37 
38 #ifdef WITH_PROFILE
39 static const char * id_names[] = {
40   "RXO_unknown",
41   "RXO_mov",	/* d = s (signed) */
42   "RXO_movbi",	/* d = [s,s2] (signed) */
43   "RXO_movbir",	/* [s,s2] = d (signed) */
44   "RXO_pushm",	/* s..s2 */
45   "RXO_popm",	/* s..s2 */
46   "RXO_xchg",	/* s <-> d */
47   "RXO_stcc",	/* d = s if cond(s2) */
48   "RXO_rtsd",	/* rtsd, 1=imm, 2-0 = reg if reg type */
49 
50   /* These are all either d OP= s or, if s2 is set, d = s OP s2.  Note
51      that d may be "None".  */
52   "RXO_and",
53   "RXO_or",
54   "RXO_xor",
55   "RXO_add",
56   "RXO_sub",
57   "RXO_mul",
58   "RXO_div",
59   "RXO_divu",
60   "RXO_shll",
61   "RXO_shar",
62   "RXO_shlr",
63 
64   "RXO_adc",	/* d = d + s + carry */
65   "RXO_sbb",	/* d = d - s - ~carry */
66   "RXO_abs",	/* d = |s| */
67   "RXO_max",	/* d = max(d,s) */
68   "RXO_min",	/* d = min(d,s) */
69   "RXO_emul",	/* d:64 = d:32 * s */
70   "RXO_emulu",	/* d:64 = d:32 * s (unsigned) */
71 
72   "RXO_rolc",	/* d <<= 1 through carry */
73   "RXO_rorc",	/* d >>= 1 through carry*/
74   "RXO_rotl",	/* d <<= #s without carry */
75   "RXO_rotr",	/* d >>= #s without carry*/
76   "RXO_revw",	/* d = revw(s) */
77   "RXO_revl",	/* d = revl(s) */
78   "RXO_branch",	/* pc = d if cond(s) */
79   "RXO_branchrel",/* pc += d if cond(s) */
80   "RXO_jsr",	/* pc = d */
81   "RXO_jsrrel",	/* pc += d */
82   "RXO_rts",
83   "RXO_nop",
84   "RXO_nop2",
85   "RXO_nop3",
86   "RXO_nop4",
87   "RXO_nop5",
88   "RXO_nop6",
89   "RXO_nop7",
90 
91   "RXO_scmpu",
92   "RXO_smovu",
93   "RXO_smovb",
94   "RXO_suntil",
95   "RXO_swhile",
96   "RXO_smovf",
97   "RXO_sstr",
98 
99   "RXO_rmpa",
100   "RXO_mulhi",
101   "RXO_mullo",
102   "RXO_machi",
103   "RXO_maclo",
104   "RXO_mvtachi",
105   "RXO_mvtaclo",
106   "RXO_mvfachi",
107   "RXO_mvfacmi",
108   "RXO_mvfaclo",
109   "RXO_racw",
110 
111   "RXO_sat",	/* sat(d) */
112   "RXO_satr",
113 
114   "RXO_fadd",	/* d op= s */
115   "RXO_fcmp",
116   "RXO_fsub",
117   "RXO_ftoi",
118   "RXO_fmul",
119   "RXO_fdiv",
120   "RXO_round",
121   "RXO_itof",
122 
123   "RXO_bset",	/* d |= (1<<s) */
124   "RXO_bclr",	/* d &= ~(1<<s) */
125   "RXO_btst",	/* s & (1<<s2) */
126   "RXO_bnot",	/* d ^= (1<<s) */
127   "RXO_bmcc",	/* d<s> = cond(s2) */
128 
129   "RXO_clrpsw",	/* flag index in d */
130   "RXO_setpsw",	/* flag index in d */
131   "RXO_mvtipl",	/* new IPL in s */
132 
133   "RXO_rtfi",
134   "RXO_rte",
135   "RXO_rtd",	/* undocumented */
136   "RXO_brk",
137   "RXO_dbt",	/* undocumented */
138   "RXO_int",	/* vector id in s */
139   "RXO_stop",
140   "RXO_wait",
141 
142   "RXO_sccnd",	/* d = cond(s) ? 1 : 0 */
143 };
144 
145 static const char * optype_names[] = {
146   " -  ",
147   "#Imm",	/* #addend */
148   " Rn ",	/* Rn */
149   "[Rn]",	/* [Rn + addend] */
150   "Ps++",	/* [Rn+] */
151   "--Pr",	/* [-Rn] */
152   " cc ",	/* eq, gtu, etc */
153   "Flag",	/* [UIOSZC] */
154   "RbRi"	/* [Rb + scale * Ri] */
155 };
156 
157 #define N_RXO ARRAY_SIZE (id_names)
158 #define N_RXT ARRAY_SIZE (optype_names)
159 #define N_MAP 90
160 
161 static unsigned long long benchmark_start_cycle;
162 static unsigned long long benchmark_end_cycle;
163 
164 static int op_cache[N_RXT][N_RXT][N_RXT];
165 static int op_cache_rev[N_MAP];
166 static int op_cache_idx = 0;
167 
168 static int
169 op_lookup (int a, int b, int c)
170 {
171   if (op_cache[a][b][c])
172     return op_cache[a][b][c];
173   op_cache_idx ++;
174   if (op_cache_idx >= N_MAP)
175     {
176       printf("op_cache_idx exceeds %d\n", N_MAP);
177       exit(1);
178     }
179   op_cache[a][b][c] = op_cache_idx;
180   op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
181   return op_cache_idx;
182 }
183 
184 static char *
185 op_cache_string (int map)
186 {
187   static int ci;
188   static char cb[5][20];
189   int a, b, c;
190 
191   map = op_cache_rev[map];
192   a = (map >> 8) & 15;
193   b = (map >> 4) & 15;
194   c = (map >> 0) & 15;
195   ci = (ci + 1) % 5;
196   sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
197   return cb[ci];
198 }
199 
200 static unsigned long long cycles_per_id[N_RXO][N_MAP];
201 static unsigned long long times_per_id[N_RXO][N_MAP];
202 static unsigned long long memory_stalls;
203 static unsigned long long register_stalls;
204 static unsigned long long branch_stalls;
205 static unsigned long long branch_alignment_stalls;
206 static unsigned long long fast_returns;
207 
208 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
209 static int prev_opcode_id = RXO_unknown;
210 static int po0;
211 
212 #define STATS(x) x
213 
214 #else
215 #define STATS(x)
216 #endif /* WITH_PROFILE */
217 
218 
219 #ifdef CYCLE_ACCURATE
220 
221 static int new_rt = -1;
222 
223 /* Number of cycles to add if an insn spans an 8-byte boundary.  */
224 static int branch_alignment_penalty = 0;
225 
226 #endif
227 
228 static int running_benchmark = 1;
229 
230 #define tprintf if (trace && running_benchmark) printf
231 
232 jmp_buf decode_jmp_buf;
233 unsigned int rx_cycles = 0;
234 
235 #ifdef CYCLE_ACCURATE
236 /* If nonzero, memory was read at some point and cycle latency might
237    take effect.  */
238 static int memory_source = 0;
239 /* If nonzero, memory was written and extra cycles might be
240    needed.  */
241 static int memory_dest = 0;
242 
243 static void
244 cycles (int throughput)
245 {
246   tprintf("%d cycles\n", throughput);
247   regs.cycle_count += throughput;
248 }
249 
250 /* Number of execution (E) cycles the op uses.  For memory sources, we
251    include the load micro-op stall as two extra E cycles.  */
252 #define E(c) cycles (memory_source ? c + 2 : c)
253 #define E1 cycles (1)
254 #define E2 cycles (2)
255 #define EBIT cycles (memory_source ? 2 : 1)
256 
257 /* Check to see if a read latency must be applied for a given register.  */
258 #define RL(r) \
259   if (regs.rt == r )							\
260     {									\
261       tprintf("register %d load stall\n", r);				\
262       regs.cycle_count ++;						\
263       STATS(register_stalls ++);					\
264       regs.rt = -1;							\
265     }
266 
267 #define RLD(r)					\
268   if (memory_source)				\
269     {						\
270       tprintf ("Rt now %d\n", r);		\
271       new_rt = r;				\
272     }
273 
274 static int
275 lsb_count (unsigned long v, int is_signed)
276 {
277   int i, lsb;
278   if (is_signed && (v & 0x80000000U))
279     v = (unsigned long)(long)(-v);
280   for (i=31; i>=0; i--)
281     if (v & (1 << i))
282       {
283 	/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
284 	lsb = (i + 2) / 2;
285 	return lsb;
286       }
287   return 0;
288 }
289 
290 static int
291 divu_cycles(unsigned long num, unsigned long den)
292 {
293   int nb = lsb_count (num, 0);
294   int db = lsb_count (den, 0);
295   int rv;
296 
297   if (nb < db)
298     rv = 2;
299   else
300     rv = 3 + nb - db;
301   E (rv);
302   return rv;
303 }
304 
305 static int
306 div_cycles(long num, long den)
307 {
308   int nb = lsb_count ((unsigned long)num, 1);
309   int db = lsb_count ((unsigned long)den, 1);
310   int rv;
311 
312   if (nb < db)
313     rv = 3;
314   else
315     rv = 5 + nb - db;
316   E (rv);
317   return rv;
318 }
319 
320 #else /* !CYCLE_ACCURATE */
321 
322 #define cycles(t)
323 #define E(c)
324 #define E1
325 #define E2
326 #define EBIT
327 #define RL(r)
328 #define RLD(r)
329 
330 #define divu_cycles(n,d)
331 #define div_cycles(n,d)
332 
333 #endif /* else CYCLE_ACCURATE */
334 
335 static int size2bytes[] = {
336   4, 1, 1, 1, 2, 2, 2, 3, 4
337 };
338 
339 typedef struct {
340   unsigned long dpc;
341 } RX_Data;
342 
343 #define rx_abort() _rx_abort(__FILE__, __LINE__)
344 static void
345 _rx_abort (const char *file, int line)
346 {
347   if (strrchr (file, '/'))
348     file = strrchr (file, '/') + 1;
349   fprintf(stderr, "abort at %s:%d\n", file, line);
350   abort();
351 }
352 
353 static unsigned char *get_byte_base;
354 static RX_Opcode_Decoded **decode_cache_base;
355 static SI get_byte_page;
356 
357 void
358 reset_decoder (void)
359 {
360   get_byte_base = 0;
361   decode_cache_base = 0;
362   get_byte_page = 0;
363 }
364 
365 static inline void
366 maybe_get_mem_page (SI tpc)
367 {
368   if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
369     {
370       get_byte_page = tpc & NONPAGE_MASK;
371       get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
372       decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
373     }
374 }
375 
376 /* This gets called a *lot* so optimize it.  */
377 static int
378 rx_get_byte (void *vdata)
379 {
380   RX_Data *rx_data = (RX_Data *)vdata;
381   SI tpc = rx_data->dpc;
382 
383   /* See load.c for an explanation of this.  */
384   if (rx_big_endian)
385     tpc ^= 3;
386 
387   maybe_get_mem_page (tpc);
388 
389   rx_data->dpc ++;
390   return get_byte_base [tpc];
391 }
392 
393 static int
394 get_op (const RX_Opcode_Decoded *rd, int i)
395 {
396   const RX_Opcode_Operand *o = rd->op + i;
397   int addr, rv = 0;
398 
399   switch (o->type)
400     {
401     case RX_Operand_None:
402       rx_abort ();
403 
404     case RX_Operand_Immediate:	/* #addend */
405       return o->addend;
406 
407     case RX_Operand_Register:	/* Rn */
408       RL (o->reg);
409       rv = get_reg (o->reg);
410       break;
411 
412     case RX_Operand_Predec:	/* [-Rn] */
413       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
414       /* fall through */
415     case RX_Operand_Postinc:	/* [Rn+] */
416     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
417     case RX_Operand_Indirect:	/* [Rn + addend] */
418     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
419 #ifdef CYCLE_ACCURATE
420       RL (o->reg);
421       if (o->type == RX_Operand_TwoReg)
422 	RL (rd->op[2].reg);
423       regs.rt = -1;
424       if (regs.m2m == M2M_BOTH)
425 	{
426 	  tprintf("src memory stall\n");
427 #ifdef WITH_PROFILE
428 	  memory_stalls ++;
429 #endif
430 	  regs.cycle_count ++;
431 	  regs.m2m = 0;
432 	}
433 
434       memory_source = 1;
435 #endif
436 
437       if (o->type == RX_Operand_TwoReg)
438 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
439       else
440 	addr = get_reg (o->reg) + o->addend;
441 
442       switch (o->size)
443 	{
444 	default:
445 	case RX_AnySize:
446 	  rx_abort ();
447 
448 	case RX_Byte: /* undefined extension */
449 	case RX_UByte:
450 	case RX_SByte:
451 	  rv = mem_get_qi (addr);
452 	  break;
453 
454 	case RX_Word: /* undefined extension */
455 	case RX_UWord:
456 	case RX_SWord:
457 	  rv = mem_get_hi (addr);
458 	  break;
459 
460 	case RX_3Byte:
461 	  rv = mem_get_psi (addr);
462 	  break;
463 
464 	case RX_Long:
465 	  rv = mem_get_si (addr);
466 	  break;
467 	}
468 
469       if (o->type == RX_Operand_Postinc)
470 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
471 
472       break;
473 
474     case RX_Operand_Condition:	/* eq, gtu, etc */
475       return condition_true (o->reg);
476 
477     case RX_Operand_Flag:	/* [UIOSZC] */
478       return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
479     }
480 
481   /* if we've gotten here, we need to clip/extend the value according
482      to the size.  */
483   switch (o->size)
484     {
485     default:
486     case RX_AnySize:
487       rx_abort ();
488 
489     case RX_Byte: /* undefined extension */
490       rv |= 0xdeadbe00; /* keep them honest */
491       break;
492 
493     case RX_UByte:
494       rv &= 0xff;
495       break;
496 
497     case RX_SByte:
498       rv = sign_ext (rv, 8);
499       break;
500 
501     case RX_Word: /* undefined extension */
502       rv |= 0xdead0000; /* keep them honest */
503       break;
504 
505     case RX_UWord:
506       rv &=  0xffff;
507       break;
508 
509     case RX_SWord:
510       rv = sign_ext (rv, 16);
511       break;
512 
513     case RX_3Byte:
514       rv &= 0xffffff;
515       break;
516 
517     case RX_Long:
518       break;
519     }
520   return rv;
521 }
522 
523 static void
524 put_op (const RX_Opcode_Decoded *rd, int i, int v)
525 {
526   const RX_Opcode_Operand *o = rd->op + i;
527   int addr;
528 
529   switch (o->size)
530     {
531     default:
532     case RX_AnySize:
533       if (o->type != RX_Operand_Register)
534 	rx_abort ();
535       break;
536 
537     case RX_Byte: /* undefined extension */
538       v |= 0xdeadbe00; /* keep them honest */
539       break;
540 
541     case RX_UByte:
542       v &= 0xff;
543       break;
544 
545     case RX_SByte:
546       v = sign_ext (v, 8);
547       break;
548 
549     case RX_Word: /* undefined extension */
550       v |= 0xdead0000; /* keep them honest */
551       break;
552 
553     case RX_UWord:
554       v &=  0xffff;
555       break;
556 
557     case RX_SWord:
558       v = sign_ext (v, 16);
559       break;
560 
561     case RX_3Byte:
562       v &= 0xffffff;
563       break;
564 
565     case RX_Long:
566       break;
567     }
568 
569   switch (o->type)
570     {
571     case RX_Operand_None:
572       /* Opcodes like TST and CMP use this.  */
573       break;
574 
575     case RX_Operand_Immediate:	/* #addend */
576     case RX_Operand_Condition:	/* eq, gtu, etc */
577       rx_abort ();
578 
579     case RX_Operand_Register:	/* Rn */
580       put_reg (o->reg, v);
581       RLD (o->reg);
582       break;
583 
584     case RX_Operand_Predec:	/* [-Rn] */
585       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
586       /* fall through */
587     case RX_Operand_Postinc:	/* [Rn+] */
588     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
589     case RX_Operand_Indirect:	/* [Rn + addend] */
590     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
591 
592 #ifdef CYCLE_ACCURATE
593       if (regs.m2m == M2M_BOTH)
594 	{
595 	  tprintf("dst memory stall\n");
596 	  regs.cycle_count ++;
597 #ifdef WITH_PROFILE
598 	  memory_stalls ++;
599 #endif
600 	  regs.m2m = 0;
601 	}
602       memory_dest = 1;
603 #endif
604 
605       if (o->type == RX_Operand_TwoReg)
606 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
607       else
608 	addr = get_reg (o->reg) + o->addend;
609 
610       switch (o->size)
611 	{
612 	default:
613 	case RX_AnySize:
614 	  rx_abort ();
615 
616 	case RX_Byte: /* undefined extension */
617 	case RX_UByte:
618 	case RX_SByte:
619 	  mem_put_qi (addr, v);
620 	  break;
621 
622 	case RX_Word: /* undefined extension */
623 	case RX_UWord:
624 	case RX_SWord:
625 	  mem_put_hi (addr, v);
626 	  break;
627 
628 	case RX_3Byte:
629 	  mem_put_psi (addr, v);
630 	  break;
631 
632 	case RX_Long:
633 	  mem_put_si (addr, v);
634 	  break;
635 	}
636 
637       if (o->type == RX_Operand_Postinc)
638 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
639 
640       break;
641 
642     case RX_Operand_Flag:	/* [UIOSZC] */
643       if (v)
644 	regs.r_psw |= (1 << o->reg);
645       else
646 	regs.r_psw &= ~(1 << o->reg);
647       break;
648     }
649 }
650 
651 #define PD(x) put_op (opcode, 0, x)
652 #define PS(x) put_op (opcode, 1, x)
653 #define PS2(x) put_op (opcode, 2, x)
654 #define GD() get_op (opcode, 0)
655 #define GS() get_op (opcode, 1)
656 #define GS2() get_op (opcode, 2)
657 #define DSZ() size2bytes[opcode->op[0].size]
658 #define SSZ() size2bytes[opcode->op[0].size]
659 #define S2SZ() size2bytes[opcode->op[0].size]
660 
661 /* "Universal" sources.  */
662 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
663 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
664 
665 static void
666 push(int val)
667 {
668   int rsp = get_reg (sp);
669   rsp -= 4;
670   put_reg (sp, rsp);
671   mem_put_si (rsp, val);
672 }
673 
674 /* Just like the above, but tag the memory as "pushed pc" so if anyone
675    tries to write to it, it will cause an error.  */
676 static void
677 pushpc(int val)
678 {
679   int rsp = get_reg (sp);
680   rsp -= 4;
681   put_reg (sp, rsp);
682   mem_put_si (rsp, val);
683   mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
684 }
685 
686 static int
687 pop (void)
688 {
689   int rv;
690   int rsp = get_reg (sp);
691   rv = mem_get_si (rsp);
692   rsp += 4;
693   put_reg (sp, rsp);
694   return rv;
695 }
696 
697 static int
698 poppc (void)
699 {
700   int rv;
701   int rsp = get_reg (sp);
702   if (mem_get_content_type (rsp) != MC_PUSHED_PC)
703     execution_error (SIM_ERR_CORRUPT_STACK, rsp);
704   rv = mem_get_si (rsp);
705   mem_set_content_range (rsp, rsp+3, MC_UNINIT);
706   rsp += 4;
707   put_reg (sp, rsp);
708   return rv;
709 }
710 
711 #define MATH_OP(vop,c)				\
712 { \
713   umb = US2(); \
714   uma = US1(); \
715   ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
716   tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
717   ma = sign_ext (uma, DSZ() * 8);					\
718   mb = sign_ext (umb, DSZ() * 8);					\
719   sll = (long long) ma vop (long long) mb vop c; \
720   tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
721   set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
722   PD (sll); \
723   E (1);    \
724 }
725 
726 #define LOGIC_OP(vop) \
727 { \
728   mb = US2(); \
729   ma = US1(); \
730   v = ma vop mb; \
731   tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
732   set_sz (v, DSZ()); \
733   PD(v); \
734   E (1); \
735 }
736 
737 #define SHIFT_OP(val, type, count, OP, carry_mask)	\
738 { \
739   int i, c=0; \
740   count = US2(); \
741   val = (type)US1();				\
742   tprintf("%lld " #OP " %d\n", val, count); \
743   for (i = 0; i < count; i ++) \
744     { \
745       c = val & carry_mask; \
746       val OP 1; \
747     } \
748   set_oszc (val, 4, c); \
749   PD (val); \
750 }
751 
752 typedef union {
753   int i;
754   float f;
755 } FloatInt;
756 
757 static inline int
758 float2int (float f)
759 {
760   FloatInt fi;
761   fi.f = f;
762   return fi.i;
763 }
764 
765 static inline float
766 int2float (int i)
767 {
768   FloatInt fi;
769   fi.i = i;
770   return fi.f;
771 }
772 
773 static int
774 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
775 {
776   *d = rxfp_add (s1, s2);
777   return 1;
778 }
779 
780 static int
781 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
782 {
783   *d = rxfp_mul (s1, s2);
784   return 1;
785 }
786 
787 static int
788 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
789 {
790   *d = rxfp_div (s1, s2);
791   return 1;
792 }
793 
794 static int
795 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
796 {
797   *d = rxfp_sub (s1, s2);
798   return 1;
799 }
800 
801 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
802 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
803 #define FPCHECK() \
804   if (FPPENDING()) \
805     return do_fp_exception (opcode_pc)
806 
807 #define FLOAT_OP(func) \
808 { \
809   int do_store;   \
810   fp_t fa, fb, fc; \
811   FPCLEAR(); \
812   fb = GS (); \
813   fa = GD (); \
814   do_store = fop_##func (fa, fb, &fc); \
815   tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
816   FPCHECK(); \
817   if (do_store) \
818     PD (fc);	\
819   mb = 0; \
820   if ((fc & 0x80000000UL) != 0) \
821     mb |= FLAGBIT_S; \
822   if ((fc & 0x7fffffffUL) == 0)			\
823     mb |= FLAGBIT_Z; \
824   set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
825 }
826 
827 #define carry (FLAG_C ? 1 : 0)
828 
829 static struct {
830   unsigned long vaddr;
831   const char *str;
832   int signal;
833 } exception_info[] = {
834   { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
835   { 0xFFFFFFD4UL, "access violation", SIGSEGV },
836   { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
837   { 0xFFFFFFE4UL, "floating point", SIGFPE }
838 };
839 #define EX_PRIVILEDGED	0
840 #define EX_ACCESS	1
841 #define EX_UNDEFINED	2
842 #define EX_FLOATING	3
843 #define EXCEPTION(n)  \
844   return generate_exception (n, opcode_pc)
845 
846 #define PRIVILEDGED() \
847   if (FLAG_PM) \
848     EXCEPTION (EX_PRIVILEDGED)
849 
850 static int
851 generate_exception (unsigned long type, SI opcode_pc)
852 {
853   SI old_psw, old_pc, new_pc;
854 
855   new_pc = mem_get_si (exception_info[type].vaddr);
856   /* 0x00020000 is the value used to initialise the known
857      exception vectors (see rx.ld), but it is a reserved
858      area of memory so do not try to access it, and if the
859      value has not been changed by the program then the
860      vector has not been installed.  */
861   if (new_pc == 0 || new_pc == 0x00020000)
862     {
863       if (rx_in_gdb)
864 	return RX_MAKE_STOPPED (exception_info[type].signal);
865 
866       fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
867 	      exception_info[type].str, (unsigned long) opcode_pc);
868       if (type == EX_FLOATING)
869 	{
870 	  int mask = FPPENDING ();
871 	  fprintf (stderr, "Pending FP exceptions:");
872 	  if (mask & FPSWBITS_FV)
873 	    fprintf(stderr, " Invalid");
874 	  if (mask & FPSWBITS_FO)
875 	    fprintf(stderr, " Overflow");
876 	  if (mask & FPSWBITS_FZ)
877 	    fprintf(stderr, " Division-by-zero");
878 	  if (mask & FPSWBITS_FU)
879 	    fprintf(stderr, " Underflow");
880 	  if (mask & FPSWBITS_FX)
881 	    fprintf(stderr, " Inexact");
882 	  if (mask & FPSWBITS_CE)
883 	    fprintf(stderr, " Unimplemented");
884 	  fprintf(stderr, "\n");
885 	}
886       return RX_MAKE_EXITED (1);
887     }
888 
889   tprintf ("Triggering %s exception\n", exception_info[type].str);
890 
891   old_psw = regs.r_psw;
892   regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
893   old_pc = opcode_pc;
894   regs.r_pc = new_pc;
895   pushpc (old_psw);
896   pushpc (old_pc);
897   return RX_MAKE_STEPPED ();
898 }
899 
900 void
901 generate_access_exception (void)
902 {
903   int rv;
904 
905   rv = generate_exception (EX_ACCESS, regs.r_pc);
906   if (RX_EXITED (rv))
907     longjmp (decode_jmp_buf, rv);
908 }
909 
910 static int
911 do_fp_exception (unsigned long opcode_pc)
912 {
913   while (FPPENDING())
914     EXCEPTION (EX_FLOATING);
915   return RX_MAKE_STEPPED ();
916 }
917 
918 static int
919 op_is_memory (const RX_Opcode_Decoded *rd, int i)
920 {
921   switch (rd->op[i].type)
922     {
923     case RX_Operand_Predec:
924     case RX_Operand_Postinc:
925     case RX_Operand_Indirect:
926       return 1;
927     default:
928       return 0;
929     }
930 }
931 #define OM(i) op_is_memory (opcode, i)
932 
933 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
934 
935 int
936 decode_opcode (void)
937 {
938   unsigned int uma=0, umb=0;
939   int ma=0, mb=0;
940   int opcode_size, v;
941   unsigned long long ll;
942   long long sll;
943   unsigned long opcode_pc;
944   RX_Data rx_data;
945   const RX_Opcode_Decoded *opcode;
946 #ifdef WITH_PROFILE
947   unsigned long long prev_cycle_count;
948 #endif
949 #ifdef CYCLE_ACCURATE
950   unsigned int tx;
951 #endif
952 
953 #ifdef WITH_PROFILE
954   prev_cycle_count = regs.cycle_count;
955 #endif
956 
957 #ifdef CYCLE_ACCURATE
958   memory_source = 0;
959   memory_dest = 0;
960 #endif
961 
962   rx_cycles ++;
963 
964   maybe_get_mem_page (regs.r_pc);
965 
966   opcode_pc = regs.r_pc;
967 
968   /* Note that we don't word-swap this point, there's no point.  */
969   if (decode_cache_base[opcode_pc] == NULL)
970     {
971       RX_Opcode_Decoded *opcode_w;
972       rx_data.dpc = opcode_pc;
973       opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
974       opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
975 				      rx_get_byte, &rx_data);
976       opcode = opcode_w;
977     }
978   else
979     {
980       opcode = decode_cache_base[opcode_pc];
981       opcode_size = opcode->n_bytes;
982     }
983 
984 #ifdef CYCLE_ACCURATE
985   if (branch_alignment_penalty)
986     {
987       if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
988 	{
989 	  tprintf("1 cycle branch alignment penalty\n");
990 	  cycles (branch_alignment_penalty);
991 #ifdef WITH_PROFILE
992 	  branch_alignment_stalls ++;
993 #endif
994 	}
995       branch_alignment_penalty = 0;
996     }
997 #endif
998 
999   regs.r_pc += opcode_size;
1000 
1001   rx_flagmask = opcode->flags_s;
1002   rx_flagand = ~(int)opcode->flags_0;
1003   rx_flagor = opcode->flags_1;
1004 
1005   switch (opcode->id)
1006     {
1007     case RXO_abs:
1008       sll = GS ();
1009       tprintf("|%lld| = ", sll);
1010       if (sll < 0)
1011 	sll = -sll;
1012       tprintf("%lld\n", sll);
1013       PD (sll);
1014       set_osz (sll, 4);
1015       E (1);
1016       break;
1017 
1018     case RXO_adc:
1019       MATH_OP (+,carry);
1020       break;
1021 
1022     case RXO_add:
1023       MATH_OP (+,0);
1024       break;
1025 
1026     case RXO_and:
1027       LOGIC_OP (&);
1028       break;
1029 
1030     case RXO_bclr:
1031       ma = GD ();
1032       mb = GS ();
1033       if (opcode->op[0].type == RX_Operand_Register)
1034 	mb &= 0x1f;
1035       else
1036 	mb &= 0x07;
1037       ma &= ~(1 << mb);
1038       PD (ma);
1039       EBIT;
1040       break;
1041 
1042     case RXO_bmcc:
1043       ma = GD ();
1044       mb = GS ();
1045       if (opcode->op[0].type == RX_Operand_Register)
1046 	mb &= 0x1f;
1047       else
1048 	mb &= 0x07;
1049       if (GS2 ())
1050 	ma |= (1 << mb);
1051       else
1052 	ma &= ~(1 << mb);
1053       PD (ma);
1054       EBIT;
1055       break;
1056 
1057     case RXO_bnot:
1058       ma = GD ();
1059       mb = GS ();
1060       if (opcode->op[0].type == RX_Operand_Register)
1061 	mb &= 0x1f;
1062       else
1063 	mb &= 0x07;
1064       ma ^= (1 << mb);
1065       PD (ma);
1066       EBIT;
1067       break;
1068 
1069     case RXO_branch:
1070       if (opcode->op[1].type == RX_Operand_None || GS())
1071 	{
1072 #ifdef CYCLE_ACCURATE
1073 	  SI old_pc = regs.r_pc;
1074 	  int delta;
1075 #endif
1076 	  regs.r_pc = GD();
1077 #ifdef CYCLE_ACCURATE
1078 	  delta = regs.r_pc - old_pc;
1079 	  if (delta >= 0 && delta < 16
1080 	      && opcode_size > 1)
1081 	    {
1082 	      tprintf("near forward branch bonus\n");
1083 	      cycles (2);
1084 	    }
1085 	  else
1086 	    {
1087 	      cycles (3);
1088 	      branch_alignment_penalty = 1;
1089 	    }
1090 #ifdef WITH_PROFILE
1091 	  branch_stalls ++;
1092 #endif
1093 #endif
1094 	}
1095 #ifdef CYCLE_ACCURATE
1096       else
1097 	cycles (1);
1098 #endif
1099       break;
1100 
1101     case RXO_branchrel:
1102       if (opcode->op[1].type == RX_Operand_None || GS())
1103 	{
1104 	  int delta = GD();
1105 	  regs.r_pc = opcode_pc + delta;
1106 #ifdef CYCLE_ACCURATE
1107 	  /* Note: specs say 3, chip says 2.  */
1108 	  if (delta >= 0 && delta < 16
1109 	      && opcode_size > 1)
1110 	    {
1111 	      tprintf("near forward branch bonus\n");
1112 	      cycles (2);
1113 	    }
1114 	  else
1115 	    {
1116 	      cycles (3);
1117 	      branch_alignment_penalty = 1;
1118 	    }
1119 #ifdef WITH_PROFILE
1120 	  branch_stalls ++;
1121 #endif
1122 #endif
1123 	}
1124 #ifdef CYCLE_ACCURATE
1125       else
1126 	cycles (1);
1127 #endif
1128       break;
1129 
1130     case RXO_brk:
1131       {
1132 	int old_psw = regs.r_psw;
1133 	if (rx_in_gdb)
1134 	  DO_RETURN (RX_MAKE_HIT_BREAK ());
1135 	if (regs.r_intb == 0)
1136 	  {
1137 	    tprintf("BREAK hit, no vector table.\n");
1138 	    DO_RETURN (RX_MAKE_EXITED(1));
1139 	  }
1140 	regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1141 	pushpc (old_psw);
1142 	pushpc (regs.r_pc);
1143 	regs.r_pc = mem_get_si (regs.r_intb);
1144 	cycles(6);
1145       }
1146       break;
1147 
1148     case RXO_bset:
1149       ma = GD ();
1150       mb = GS ();
1151       if (opcode->op[0].type == RX_Operand_Register)
1152 	mb &= 0x1f;
1153       else
1154 	mb &= 0x07;
1155       ma |= (1 << mb);
1156       PD (ma);
1157       EBIT;
1158       break;
1159 
1160     case RXO_btst:
1161       ma = GS ();
1162       mb = GS2 ();
1163       if (opcode->op[1].type == RX_Operand_Register)
1164 	mb &= 0x1f;
1165       else
1166 	mb &= 0x07;
1167       umb = ma & (1 << mb);
1168       set_zc (! umb, umb);
1169       EBIT;
1170       break;
1171 
1172     case RXO_clrpsw:
1173       v = 1 << opcode->op[0].reg;
1174       if (FLAG_PM
1175 	  && (v == FLAGBIT_I
1176 	      || v == FLAGBIT_U))
1177 	break;
1178       regs.r_psw &= ~v;
1179       cycles (1);
1180       break;
1181 
1182     case RXO_div: /* d = d / s */
1183       ma = GS();
1184       mb = GD();
1185       tprintf("%d / %d = ", mb, ma);
1186       if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1187 	{
1188 	  tprintf("#NAN\n");
1189 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1190 	  cycles (3);
1191 	}
1192       else
1193 	{
1194 	  v = mb/ma;
1195 	  tprintf("%d\n", v);
1196 	  set_flags (FLAGBIT_O, 0);
1197 	  PD (v);
1198 	  div_cycles (mb, ma);
1199 	}
1200       break;
1201 
1202     case RXO_divu: /* d = d / s */
1203       uma = GS();
1204       umb = GD();
1205       tprintf("%u / %u = ", umb, uma);
1206       if (uma == 0)
1207 	{
1208 	  tprintf("#NAN\n");
1209 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1210 	  cycles (2);
1211 	}
1212       else
1213 	{
1214 	  v = umb / uma;
1215 	  tprintf("%u\n", v);
1216 	  set_flags (FLAGBIT_O, 0);
1217 	  PD (v);
1218 	  divu_cycles (umb, uma);
1219 	}
1220       break;
1221 
1222     case RXO_emul:
1223       ma = GD ();
1224       mb = GS ();
1225       sll = (long long)ma * (long long)mb;
1226       tprintf("%d * %d = %lld\n", ma, mb, sll);
1227       put_reg (opcode->op[0].reg, sll);
1228       put_reg (opcode->op[0].reg + 1, sll >> 32);
1229       E2;
1230       break;
1231 
1232     case RXO_emulu:
1233       uma = GD ();
1234       umb = GS ();
1235       ll = (long long)uma * (long long)umb;
1236       tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1237       put_reg (opcode->op[0].reg, ll);
1238       put_reg (opcode->op[0].reg + 1, ll >> 32);
1239       E2;
1240       break;
1241 
1242     case RXO_fadd:
1243       FLOAT_OP (fadd);
1244       E (4);
1245       break;
1246 
1247     case RXO_fcmp:
1248       ma = GD();
1249       mb = GS();
1250       FPCLEAR ();
1251       rxfp_cmp (ma, mb);
1252       FPCHECK ();
1253       E (1);
1254       break;
1255 
1256     case RXO_fdiv:
1257       FLOAT_OP (fdiv);
1258       E (16);
1259       break;
1260 
1261     case RXO_fmul:
1262       FLOAT_OP (fmul);
1263       E (3);
1264       break;
1265 
1266     case RXO_rtfi:
1267       PRIVILEDGED ();
1268       regs.r_psw = regs.r_bpsw;
1269       regs.r_pc = regs.r_bpc;
1270 #ifdef CYCLE_ACCURATE
1271       regs.fast_return = 0;
1272       cycles(3);
1273 #endif
1274       break;
1275 
1276     case RXO_fsub:
1277       FLOAT_OP (fsub);
1278       E (4);
1279       break;
1280 
1281     case RXO_ftoi:
1282       ma = GS ();
1283       FPCLEAR ();
1284       mb = rxfp_ftoi (ma, FPRM_ZERO);
1285       FPCHECK ();
1286       PD (mb);
1287       tprintf("(int) %g = %d\n", int2float(ma), mb);
1288       set_sz (mb, 4);
1289       E (2);
1290       break;
1291 
1292     case RXO_int:
1293       v = GS ();
1294       if (v == 255)
1295 	{
1296 	  int rc = rx_syscall (regs.r[5]);
1297 	  if (! RX_STEPPED (rc))
1298 	    DO_RETURN (rc);
1299 	}
1300       else
1301 	{
1302 	  int old_psw = regs.r_psw;
1303 	  regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1304 	  pushpc (old_psw);
1305 	  pushpc (regs.r_pc);
1306 	  regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1307 	}
1308       cycles (6);
1309       break;
1310 
1311     case RXO_itof:
1312       ma = GS ();
1313       FPCLEAR ();
1314       mb = rxfp_itof (ma, regs.r_fpsw);
1315       FPCHECK ();
1316       tprintf("(float) %d = %x\n", ma, mb);
1317       PD (mb);
1318       set_sz (ma, 4);
1319       E (2);
1320       break;
1321 
1322     case RXO_jsr:
1323     case RXO_jsrrel:
1324       {
1325 #ifdef CYCLE_ACCURATE
1326 	int delta;
1327 	regs.m2m = 0;
1328 #endif
1329 	v = GD ();
1330 #ifdef CYCLE_ACCURATE
1331 	regs.link_register = regs.r_pc;
1332 #endif
1333 	pushpc (get_reg (pc));
1334 	if (opcode->id == RXO_jsrrel)
1335 	  v += regs.r_pc;
1336 #ifdef CYCLE_ACCURATE
1337 	delta = v - regs.r_pc;
1338 #endif
1339 	put_reg (pc, v);
1340 #ifdef CYCLE_ACCURATE
1341 	/* Note: docs say 3, chip says 2 */
1342 	if (delta >= 0 && delta < 16)
1343 	  {
1344 	    tprintf ("near forward jsr bonus\n");
1345 	    cycles (2);
1346 	  }
1347 	else
1348 	  {
1349 	    branch_alignment_penalty = 1;
1350 	    cycles (3);
1351 	  }
1352 	regs.fast_return = 1;
1353 #endif
1354       }
1355       break;
1356 
1357     case RXO_machi:
1358       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1359       ll <<= 16;
1360       put_reg64 (acc64, ll + regs.r_acc);
1361       E1;
1362       break;
1363 
1364     case RXO_maclo:
1365       ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1366       ll <<= 16;
1367       put_reg64 (acc64, ll + regs.r_acc);
1368       E1;
1369       break;
1370 
1371     case RXO_max:
1372       mb = GS();
1373       ma = GD();
1374       if (ma > mb)
1375 	PD (ma);
1376       else
1377 	PD (mb);
1378       E (1);
1379       break;
1380 
1381     case RXO_min:
1382       mb = GS();
1383       ma = GD();
1384       if (ma < mb)
1385 	PD (ma);
1386       else
1387 	PD (mb);
1388       E (1);
1389       break;
1390 
1391     case RXO_mov:
1392       v = GS ();
1393 
1394       if (opcode->op[1].type == RX_Operand_Register
1395 	  && opcode->op[1].reg == 17 /* PC */)
1396 	{
1397 	  /* Special case.  We want the address of the insn, not the
1398 	     address of the next insn.  */
1399 	  v = opcode_pc;
1400 	}
1401 
1402       if (opcode->op[0].type == RX_Operand_Register
1403 	  && opcode->op[0].reg == 16 /* PSW */)
1404 	{
1405 	  /* Special case, LDC and POPC can't ever modify PM.  */
1406 	  int pm = regs.r_psw & FLAGBIT_PM;
1407 	  v &= ~ FLAGBIT_PM;
1408 	  v |= pm;
1409 	  if (pm)
1410 	    {
1411 	      v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1412 	      v |= pm;
1413 	    }
1414 	}
1415       if (FLAG_PM)
1416 	{
1417 	  /* various things can't be changed in user mode.  */
1418 	  if (opcode->op[0].type == RX_Operand_Register)
1419 	    if (opcode->op[0].reg == 32)
1420 	      {
1421 		v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1422 		v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1423 	      }
1424 	  if (opcode->op[0].reg == 34 /* ISP */
1425 	      || opcode->op[0].reg == 37 /* BPSW */
1426 	      || opcode->op[0].reg == 39 /* INTB */
1427 	      || opcode->op[0].reg == 38 /* VCT */)
1428 	    /* These are ignored.  */
1429 	    break;
1430 	}
1431       if (OM(0) && OM(1))
1432 	cycles (2);
1433       else
1434 	cycles (1);
1435 
1436       PD (v);
1437 
1438 #ifdef CYCLE_ACCURATE
1439       if ((opcode->op[0].type == RX_Operand_Predec
1440 	   && opcode->op[1].type == RX_Operand_Register)
1441 	  || (opcode->op[0].type == RX_Operand_Postinc
1442 	      && opcode->op[1].type == RX_Operand_Register))
1443 	{
1444 	  /* Special case: push reg doesn't cause a memory stall.  */
1445 	  memory_dest = 0;
1446 	  tprintf("push special case\n");
1447 	}
1448 #endif
1449 
1450       set_sz (v, DSZ());
1451       break;
1452 
1453     case RXO_movbi:
1454       PD (GS ());
1455       cycles (1);
1456       break;
1457 
1458     case RXO_movbir:
1459       PS (GD ());
1460       cycles (1);
1461       break;
1462 
1463     case RXO_mul:
1464       v = US2 ();
1465       ll = (unsigned long long) US1() * (unsigned long long) v;
1466       PD(ll);
1467       E (1);
1468       break;
1469 
1470     case RXO_mulhi:
1471       v = GS2 ();
1472       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1473       ll <<= 16;
1474       put_reg64 (acc64, ll);
1475       E1;
1476       break;
1477 
1478     case RXO_mullo:
1479       v = GS2 ();
1480       ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1481       ll <<= 16;
1482       put_reg64 (acc64, ll);
1483       E1;
1484       break;
1485 
1486     case RXO_mvfachi:
1487       PD (get_reg (acchi));
1488       E1;
1489       break;
1490 
1491     case RXO_mvfaclo:
1492       PD (get_reg (acclo));
1493       E1;
1494       break;
1495 
1496     case RXO_mvfacmi:
1497       PD (get_reg (accmi));
1498       E1;
1499       break;
1500 
1501     case RXO_mvtachi:
1502       put_reg (acchi, GS ());
1503       E1;
1504       break;
1505 
1506     case RXO_mvtaclo:
1507       put_reg (acclo, GS ());
1508       E1;
1509       break;
1510 
1511     case RXO_mvtipl:
1512       regs.r_psw &= ~ FLAGBITS_IPL;
1513       regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1514       E1;
1515       break;
1516 
1517     case RXO_nop:
1518     case RXO_nop2:
1519     case RXO_nop3:
1520     case RXO_nop4:
1521     case RXO_nop5:
1522     case RXO_nop6:
1523     case RXO_nop7:
1524       E1;
1525       break;
1526 
1527     case RXO_or:
1528       LOGIC_OP (|);
1529       break;
1530 
1531     case RXO_popm:
1532       /* POPM cannot pop R0 (sp).  */
1533       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1534 	EXCEPTION (EX_UNDEFINED);
1535       if (opcode->op[1].reg >= opcode->op[2].reg)
1536 	{
1537 	  regs.r_pc = opcode_pc;
1538 	  DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1539 	}
1540       for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1541 	{
1542 	  cycles (1);
1543 	  RLD (v);
1544 	  put_reg (v, pop ());
1545 	}
1546       break;
1547 
1548     case RXO_pushm:
1549       /* PUSHM cannot push R0 (sp).  */
1550       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1551 	EXCEPTION (EX_UNDEFINED);
1552       if (opcode->op[1].reg >= opcode->op[2].reg)
1553 	{
1554 	  regs.r_pc = opcode_pc;
1555 	  return RX_MAKE_STOPPED (SIGILL);
1556 	}
1557       for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1558 	{
1559 	  RL (v);
1560 	  push (get_reg (v));
1561 	}
1562       cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1563       break;
1564 
1565     case RXO_racw:
1566       ll = get_reg64 (acc64) << GS ();
1567       ll += 0x80000000ULL;
1568       if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1569 	ll = 0x00007fff00000000ULL;
1570       else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1571 	ll = 0xffff800000000000ULL;
1572       else
1573 	ll &= 0xffffffff00000000ULL;
1574       put_reg64 (acc64, ll);
1575       E1;
1576       break;
1577 
1578     case RXO_rte:
1579       PRIVILEDGED ();
1580       regs.r_pc = poppc ();
1581       regs.r_psw = poppc ();
1582       if (FLAG_PM)
1583 	regs.r_psw |= FLAGBIT_U;
1584 #ifdef CYCLE_ACCURATE
1585       regs.fast_return = 0;
1586       cycles (6);
1587 #endif
1588       break;
1589 
1590     case RXO_revl:
1591       uma = GS ();
1592       umb = (((uma >> 24) & 0xff)
1593 	     | ((uma >> 8) & 0xff00)
1594 	     | ((uma << 8) & 0xff0000)
1595 	     | ((uma << 24) & 0xff000000UL));
1596       PD (umb);
1597       E1;
1598       break;
1599 
1600     case RXO_revw:
1601       uma = GS ();
1602       umb = (((uma >> 8) & 0x00ff00ff)
1603 	     | ((uma << 8) & 0xff00ff00UL));
1604       PD (umb);
1605       E1;
1606       break;
1607 
1608     case RXO_rmpa:
1609       RL(4);
1610       RL(5);
1611 #ifdef CYCLE_ACCURATE
1612       tx = regs.r[3];
1613 #endif
1614 
1615       while (regs.r[3] != 0)
1616 	{
1617 	  long long tmp;
1618 
1619 	  switch (opcode->size)
1620 	    {
1621 	    case RX_Long:
1622 	      ma = mem_get_si (regs.r[1]);
1623 	      mb = mem_get_si (regs.r[2]);
1624 	      regs.r[1] += 4;
1625 	      regs.r[2] += 4;
1626 	      break;
1627 	    case RX_Word:
1628 	      ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1629 	      mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1630 	      regs.r[1] += 2;
1631 	      regs.r[2] += 2;
1632 	      break;
1633 	    case RX_Byte:
1634 	      ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1635 	      mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1636 	      regs.r[1] += 1;
1637 	      regs.r[2] += 1;
1638 	      break;
1639 	    default:
1640 	      abort ();
1641 	    }
1642 	  /* We do the multiply as a signed value.  */
1643 	  sll = (long long)ma * (long long)mb;
1644 	  tprintf("        %016llx = %d * %d\n", sll, ma, mb);
1645 	  /* but we do the sum as unsigned, while sign extending the operands.  */
1646 	  tmp = regs.r[4] + (sll & 0xffffffffUL);
1647 	  regs.r[4] = tmp & 0xffffffffUL;
1648 	  tmp >>= 32;
1649 	  sll >>= 32;
1650 	  tmp += regs.r[5] + (sll & 0xffffffffUL);
1651 	  regs.r[5] = tmp & 0xffffffffUL;
1652 	  tmp >>= 32;
1653 	  sll >>= 32;
1654 	  tmp += regs.r[6] + (sll & 0xffffffffUL);
1655 	  regs.r[6] = tmp & 0xffffffffUL;
1656 	  tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1657 		  (unsigned long) regs.r[6],
1658 		  (unsigned long) regs.r[5],
1659 		  (unsigned long) regs.r[4]);
1660 
1661 	  regs.r[3] --;
1662 	}
1663       if (regs.r[6] & 0x00008000)
1664 	regs.r[6] |= 0xffff0000UL;
1665       else
1666 	regs.r[6] &= 0x0000ffff;
1667       ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1668       if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1669 	set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1670       else
1671 	set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1672 #ifdef CYCLE_ACCURATE
1673       switch (opcode->size)
1674 	{
1675 	case RX_Long:
1676 	  cycles (6 + 4 * tx);
1677 	  break;
1678 	case RX_Word:
1679 	  cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1680 	  break;
1681 	case RX_Byte:
1682 	  cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1683 	  break;
1684 	default:
1685 	  abort ();
1686 	}
1687 #endif
1688       break;
1689 
1690     case RXO_rolc:
1691       v = GD ();
1692       ma = v & 0x80000000UL;
1693       v <<= 1;
1694       v |= carry;
1695       set_szc (v, 4, ma);
1696       PD (v);
1697       E1;
1698       break;
1699 
1700     case RXO_rorc:
1701       uma = GD ();
1702       mb = uma & 1;
1703       uma >>= 1;
1704       uma |= (carry ? 0x80000000UL : 0);
1705       set_szc (uma, 4, mb);
1706       PD (uma);
1707       E1;
1708       break;
1709 
1710     case RXO_rotl:
1711       mb = GS ();
1712       uma = GD ();
1713       if (mb)
1714 	{
1715 	  uma = (uma << mb) | (uma >> (32-mb));
1716 	  mb = uma & 1;
1717 	}
1718       set_szc (uma, 4, mb);
1719       PD (uma);
1720       E1;
1721       break;
1722 
1723     case RXO_rotr:
1724       mb = GS ();
1725       uma = GD ();
1726       if (mb)
1727 	{
1728 	  uma = (uma >> mb) | (uma << (32-mb));
1729 	  mb = uma & 0x80000000;
1730 	}
1731       set_szc (uma, 4, mb);
1732       PD (uma);
1733       E1;
1734       break;
1735 
1736     case RXO_round:
1737       ma = GS ();
1738       FPCLEAR ();
1739       mb = rxfp_ftoi (ma, regs.r_fpsw);
1740       FPCHECK ();
1741       PD (mb);
1742       tprintf("(int) %g = %d\n", int2float(ma), mb);
1743       set_sz (mb, 4);
1744       E (2);
1745       break;
1746 
1747     case RXO_rts:
1748       {
1749 #ifdef CYCLE_ACCURATE
1750 	int cyc = 5;
1751 #endif
1752 	regs.r_pc = poppc ();
1753 #ifdef CYCLE_ACCURATE
1754 	/* Note: specs say 5, chip says 3.  */
1755 	if (regs.fast_return && regs.link_register == regs.r_pc)
1756 	  {
1757 #ifdef WITH_PROFILE
1758 	    fast_returns ++;
1759 #endif
1760 	    tprintf("fast return bonus\n");
1761 	    cyc -= 2;
1762 	  }
1763 	cycles (cyc);
1764 	regs.fast_return = 0;
1765 	branch_alignment_penalty = 1;
1766 #endif
1767       }
1768       break;
1769 
1770     case RXO_rtsd:
1771       if (opcode->op[2].type == RX_Operand_Register)
1772 	{
1773 	  int i;
1774 	  /* RTSD cannot pop R0 (sp).  */
1775 	  put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1776 	  if (opcode->op[2].reg == 0)
1777 	    EXCEPTION (EX_UNDEFINED);
1778 #ifdef CYCLE_ACCURATE
1779 	  tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1780 #endif
1781 	  for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1782 	    {
1783 	      RLD (i);
1784 	      put_reg (i, pop ());
1785 	    }
1786 	}
1787       else
1788 	{
1789 #ifdef CYCLE_ACCURATE
1790 	  tx = 0;
1791 #endif
1792 	  put_reg (0, get_reg (0) + GS());
1793 	}
1794       put_reg (pc, poppc());
1795 #ifdef CYCLE_ACCURATE
1796       if (regs.fast_return && regs.link_register == regs.r_pc)
1797 	{
1798 	  tprintf("fast return bonus\n");
1799 #ifdef WITH_PROFILE
1800 	  fast_returns ++;
1801 #endif
1802 	  cycles (tx < 3 ? 3 : tx + 1);
1803 	}
1804       else
1805 	{
1806 	  cycles (tx < 5 ? 5 : tx + 1);
1807 	}
1808       regs.fast_return = 0;
1809       branch_alignment_penalty = 1;
1810 #endif
1811       break;
1812 
1813     case RXO_sat:
1814       if (FLAG_O && FLAG_S)
1815 	PD (0x7fffffffUL);
1816       else if (FLAG_O && ! FLAG_S)
1817 	PD (0x80000000UL);
1818       E1;
1819       break;
1820 
1821     case RXO_satr:
1822       if (FLAG_O && ! FLAG_S)
1823 	{
1824 	  put_reg (6, 0x0);
1825 	  put_reg (5, 0x7fffffff);
1826 	  put_reg (4, 0xffffffff);
1827 	}
1828       else if (FLAG_O && FLAG_S)
1829 	{
1830 	  put_reg (6, 0xffffffff);
1831 	  put_reg (5, 0x80000000);
1832 	  put_reg (4, 0x0);
1833 	}
1834       E1;
1835       break;
1836 
1837     case RXO_sbb:
1838       MATH_OP (-, ! carry);
1839       break;
1840 
1841     case RXO_sccnd:
1842       if (GS())
1843 	PD (1);
1844       else
1845 	PD (0);
1846       E1;
1847       break;
1848 
1849     case RXO_scmpu:
1850 #ifdef CYCLE_ACCURATE
1851       tx = regs.r[3];
1852 #endif
1853       while (regs.r[3] != 0)
1854 	{
1855 	  uma = mem_get_qi (regs.r[1] ++);
1856 	  umb = mem_get_qi (regs.r[2] ++);
1857 	  regs.r[3] --;
1858 	  if (uma != umb || uma == 0)
1859 	    break;
1860 	}
1861       if (uma == umb)
1862 	set_zc (1, 1);
1863       else
1864 	set_zc (0, ((int)uma - (int)umb) >= 0);
1865       cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1866       break;
1867 
1868     case RXO_setpsw:
1869       v = 1 << opcode->op[0].reg;
1870       if (FLAG_PM
1871 	  && (v == FLAGBIT_I
1872 	      || v == FLAGBIT_U))
1873 	break;
1874       regs.r_psw |= v;
1875       cycles (1);
1876       break;
1877 
1878     case RXO_smovb:
1879       RL (3);
1880 #ifdef CYCLE_ACCURATE
1881       tx = regs.r[3];
1882 #endif
1883       while (regs.r[3])
1884 	{
1885 	  uma = mem_get_qi (regs.r[2] --);
1886 	  mem_put_qi (regs.r[1]--, uma);
1887 	  regs.r[3] --;
1888 	}
1889 #ifdef CYCLE_ACCURATE
1890       if (tx > 3)
1891 	cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1892       else
1893 	cycles (2 + 3 * (tx % 4));
1894 #endif
1895       break;
1896 
1897     case RXO_smovf:
1898       RL (3);
1899 #ifdef CYCLE_ACCURATE
1900       tx = regs.r[3];
1901 #endif
1902       while (regs.r[3])
1903 	{
1904 	  uma = mem_get_qi (regs.r[2] ++);
1905 	  mem_put_qi (regs.r[1]++, uma);
1906 	  regs.r[3] --;
1907 	}
1908       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1909       break;
1910 
1911     case RXO_smovu:
1912 #ifdef CYCLE_ACCURATE
1913       tx = regs.r[3];
1914 #endif
1915       while (regs.r[3] != 0)
1916 	{
1917 	  uma = mem_get_qi (regs.r[2] ++);
1918 	  mem_put_qi (regs.r[1]++, uma);
1919 	  regs.r[3] --;
1920 	  if (uma == 0)
1921 	    break;
1922 	}
1923       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1924       break;
1925 
1926     case RXO_shar: /* d = ma >> mb */
1927       SHIFT_OP (sll, int, mb, >>=, 1);
1928       E (1);
1929       break;
1930 
1931     case RXO_shll: /* d = ma << mb */
1932       SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1933       E (1);
1934       break;
1935 
1936     case RXO_shlr: /* d = ma >> mb */
1937       SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1938       E (1);
1939       break;
1940 
1941     case RXO_sstr:
1942       RL (3);
1943 #ifdef CYCLE_ACCURATE
1944       tx = regs.r[3];
1945 #endif
1946       switch (opcode->size)
1947 	{
1948 	case RX_Long:
1949 	  while (regs.r[3] != 0)
1950 	    {
1951 	      mem_put_si (regs.r[1], regs.r[2]);
1952 	      regs.r[1] += 4;
1953 	      regs.r[3] --;
1954 	    }
1955 	  cycles (2 + tx);
1956 	  break;
1957 	case RX_Word:
1958 	  while (regs.r[3] != 0)
1959 	    {
1960 	      mem_put_hi (regs.r[1], regs.r[2]);
1961 	      regs.r[1] += 2;
1962 	      regs.r[3] --;
1963 	    }
1964 	  cycles (2 + (int)(tx / 2) + tx % 2);
1965 	  break;
1966 	case RX_Byte:
1967 	  while (regs.r[3] != 0)
1968 	    {
1969 	      mem_put_qi (regs.r[1], regs.r[2]);
1970 	      regs.r[1] ++;
1971 	      regs.r[3] --;
1972 	    }
1973 	  cycles (2 + (int)(tx / 4) + tx % 4);
1974 	  break;
1975 	default:
1976 	  abort ();
1977 	}
1978       break;
1979 
1980     case RXO_stcc:
1981       if (GS2())
1982 	PD (GS ());
1983       E1;
1984       break;
1985 
1986     case RXO_stop:
1987       PRIVILEDGED ();
1988       regs.r_psw |= FLAGBIT_I;
1989       DO_RETURN (RX_MAKE_STOPPED(0));
1990 
1991     case RXO_sub:
1992       MATH_OP (-, 0);
1993       break;
1994 
1995     case RXO_suntil:
1996       RL(3);
1997 #ifdef CYCLE_ACCURATE
1998       tx = 0;
1999 #endif
2000       if (regs.r[3] == 0)
2001 	{
2002 	  cycles (3);
2003 	  break;
2004 	}
2005       switch (opcode->size)
2006 	{
2007 	case RX_Long:
2008 	  uma = get_reg (2);
2009 	  while (regs.r[3] != 0)
2010 	    {
2011 	      regs.r[3] --;
2012 	      umb = mem_get_si (get_reg (1));
2013 	      regs.r[1] += 4;
2014 #ifdef CYCLE_ACCURATE
2015 	      tx ++;
2016 #endif
2017 	      if (umb == uma)
2018 		break;
2019 	    }
2020 #ifdef CYCLE_ACCURATE
2021 	  cycles (3 + 3 * tx);
2022 #endif
2023 	  break;
2024 	case RX_Word:
2025 	  uma = get_reg (2) & 0xffff;
2026 	  while (regs.r[3] != 0)
2027 	    {
2028 	      regs.r[3] --;
2029 	      umb = mem_get_hi (get_reg (1));
2030 	      regs.r[1] += 2;
2031 #ifdef CYCLE_ACCURATE
2032 	      tx ++;
2033 #endif
2034 	      if (umb == uma)
2035 		break;
2036 	    }
2037 #ifdef CYCLE_ACCURATE
2038 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2039 #endif
2040 	  break;
2041 	case RX_Byte:
2042 	  uma = get_reg (2) & 0xff;
2043 	  while (regs.r[3] != 0)
2044 	    {
2045 	      regs.r[3] --;
2046 	      umb = mem_get_qi (regs.r[1]);
2047 	      regs.r[1] += 1;
2048 #ifdef CYCLE_ACCURATE
2049 	      tx ++;
2050 #endif
2051 	      if (umb == uma)
2052 		break;
2053 	    }
2054 #ifdef CYCLE_ACCURATE
2055 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2056 #endif
2057 	  break;
2058 	default:
2059 	  abort();
2060 	}
2061       if (uma == umb)
2062 	set_zc (1, 1);
2063       else
2064 	set_zc (0, ((int)uma - (int)umb) >= 0);
2065       break;
2066 
2067     case RXO_swhile:
2068       RL(3);
2069 #ifdef CYCLE_ACCURATE
2070       tx = 0;
2071 #endif
2072       if (regs.r[3] == 0)
2073 	break;
2074       switch (opcode->size)
2075 	{
2076 	case RX_Long:
2077 	  uma = get_reg (2);
2078 	  while (regs.r[3] != 0)
2079 	    {
2080 	      regs.r[3] --;
2081 	      umb = mem_get_si (get_reg (1));
2082 	      regs.r[1] += 4;
2083 #ifdef CYCLE_ACCURATE
2084 	      tx ++;
2085 #endif
2086 	      if (umb != uma)
2087 		break;
2088 	    }
2089 #ifdef CYCLE_ACCURATE
2090 	  cycles (3 + 3 * tx);
2091 #endif
2092 	  break;
2093 	case RX_Word:
2094 	  uma = get_reg (2) & 0xffff;
2095 	  while (regs.r[3] != 0)
2096 	    {
2097 	      regs.r[3] --;
2098 	      umb = mem_get_hi (get_reg (1));
2099 	      regs.r[1] += 2;
2100 #ifdef CYCLE_ACCURATE
2101 	      tx ++;
2102 #endif
2103 	      if (umb != uma)
2104 		break;
2105 	    }
2106 #ifdef CYCLE_ACCURATE
2107 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2108 #endif
2109 	  break;
2110 	case RX_Byte:
2111 	  uma = get_reg (2) & 0xff;
2112 	  while (regs.r[3] != 0)
2113 	    {
2114 	      regs.r[3] --;
2115 	      umb = mem_get_qi (regs.r[1]);
2116 	      regs.r[1] += 1;
2117 #ifdef CYCLE_ACCURATE
2118 	      tx ++;
2119 #endif
2120 	      if (umb != uma)
2121 		break;
2122 	    }
2123 #ifdef CYCLE_ACCURATE
2124 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2125 #endif
2126 	  break;
2127 	default:
2128 	  abort();
2129 	}
2130       if (uma == umb)
2131 	set_zc (1, 1);
2132       else
2133 	set_zc (0, ((int)uma - (int)umb) >= 0);
2134       break;
2135 
2136     case RXO_wait:
2137       PRIVILEDGED ();
2138       regs.r_psw |= FLAGBIT_I;
2139       DO_RETURN (RX_MAKE_STOPPED(0));
2140 
2141     case RXO_xchg:
2142 #ifdef CYCLE_ACCURATE
2143       regs.m2m = 0;
2144 #endif
2145       v = GS (); /* This is the memory operand, if any.  */
2146       PS (GD ()); /* and this may change the address register.  */
2147       PD (v);
2148       E2;
2149 #ifdef CYCLE_ACCURATE
2150       /* all M cycles happen during xchg's cycles.  */
2151       memory_dest = 0;
2152       memory_source = 0;
2153 #endif
2154       break;
2155 
2156     case RXO_xor:
2157       LOGIC_OP (^);
2158       break;
2159 
2160     default:
2161       EXCEPTION (EX_UNDEFINED);
2162     }
2163 
2164 #ifdef CYCLE_ACCURATE
2165   regs.m2m = 0;
2166   if (memory_source)
2167     regs.m2m |= M2M_SRC;
2168   if (memory_dest)
2169     regs.m2m |= M2M_DST;
2170 
2171   regs.rt = new_rt;
2172   new_rt = -1;
2173 #endif
2174 
2175 #ifdef WITH_PROFILE
2176   if (prev_cycle_count == regs.cycle_count)
2177     {
2178       printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2179       abort ();
2180     }
2181 #endif
2182 
2183 #ifdef WITH_PROFILE
2184   if (running_benchmark)
2185     {
2186       int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2187 
2188 
2189       cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2190       times_per_id[opcode->id][omap] ++;
2191 
2192       times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2193 
2194       prev_opcode_id = opcode->id;
2195       po0 = omap;
2196     }
2197 #endif
2198 
2199   return RX_MAKE_STEPPED ();
2200 }
2201 
2202 #ifdef WITH_PROFILE
2203 void
2204 reset_pipeline_stats (void)
2205 {
2206   memset (cycles_per_id, 0, sizeof(cycles_per_id));
2207   memset (times_per_id, 0, sizeof(times_per_id));
2208   memory_stalls = 0;
2209   register_stalls = 0;
2210   branch_stalls = 0;
2211   branch_alignment_stalls = 0;
2212   fast_returns = 0;
2213   memset (times_per_pair, 0, sizeof(times_per_pair));
2214   running_benchmark = 1;
2215 
2216   benchmark_start_cycle = regs.cycle_count;
2217 }
2218 
2219 void
2220 halt_pipeline_stats (void)
2221 {
2222   running_benchmark = 0;
2223   benchmark_end_cycle = regs.cycle_count;
2224 }
2225 #endif
2226 
2227 void
2228 pipeline_stats (void)
2229 {
2230 #ifdef WITH_PROFILE
2231   int i, o1;
2232   int p, p1;
2233 #endif
2234 
2235 #ifdef CYCLE_ACCURATE
2236   if (verbose == 1)
2237     {
2238       printf ("cycles: %llu\n", regs.cycle_count);
2239       return;
2240     }
2241 
2242   printf ("cycles: %13s\n", comma (regs.cycle_count));
2243 #endif
2244 
2245 #ifdef WITH_PROFILE
2246   if (benchmark_start_cycle)
2247     printf ("bmark:  %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2248 
2249   printf("\n");
2250   for (i = 0; i < N_RXO; i++)
2251     for (o1 = 0; o1 < N_MAP; o1 ++)
2252       if (times_per_id[i][o1])
2253 	printf("%13s %13s %7.2f  %s %s\n",
2254 	       comma (cycles_per_id[i][o1]),
2255 	       comma (times_per_id[i][o1]),
2256 	       (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2257 	       op_cache_string(o1),
2258 	       id_names[i]+4);
2259 
2260   printf("\n");
2261   for (p = 0; p < N_RXO; p ++)
2262     for (p1 = 0; p1 < N_MAP; p1 ++)
2263       for (i = 0; i < N_RXO; i ++)
2264 	for (o1 = 0; o1 < N_MAP; o1 ++)
2265 	  if (times_per_pair[p][p1][i][o1])
2266 	    {
2267 	      printf("%13s   %s %-9s  ->  %s %s\n",
2268 		     comma (times_per_pair[p][p1][i][o1]),
2269 		     op_cache_string(p1),
2270 		     id_names[p]+4,
2271 		     op_cache_string(o1),
2272 		     id_names[i]+4);
2273 	    }
2274 
2275   printf("\n");
2276   printf("%13s memory stalls\n", comma (memory_stalls));
2277   printf("%13s register stalls\n", comma (register_stalls));
2278   printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2279   printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2280   printf("%13s fast returns\n", comma (fast_returns));
2281 #endif
2282 }
2283