xref: /netbsd-src/external/gpl3/gdb/dist/sim/rx/rx.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /* rx.c --- opcode semantics for stand-alone RX simulator.
2 
3 Copyright (C) 2008-2017 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
5 
6 This file is part of the GNU simulators.
7 
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
20 
21 #include "config.h"
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <signal.h>
26 #include "libiberty.h"
27 
28 #include "opcode/rx.h"
29 #include "cpu.h"
30 #include "mem.h"
31 #include "syscalls.h"
32 #include "fpu.h"
33 #include "err.h"
34 #include "misc.h"
35 
36 #ifdef CYCLE_STATS
37 static const char * id_names[] = {
38   "RXO_unknown",
39   "RXO_mov",	/* d = s (signed) */
40   "RXO_movbi",	/* d = [s,s2] (signed) */
41   "RXO_movbir",	/* [s,s2] = d (signed) */
42   "RXO_pushm",	/* s..s2 */
43   "RXO_popm",	/* s..s2 */
44   "RXO_xchg",	/* s <-> d */
45   "RXO_stcc",	/* d = s if cond(s2) */
46   "RXO_rtsd",	/* rtsd, 1=imm, 2-0 = reg if reg type */
47 
48   /* These are all either d OP= s or, if s2 is set, d = s OP s2.  Note
49      that d may be "None".  */
50   "RXO_and",
51   "RXO_or",
52   "RXO_xor",
53   "RXO_add",
54   "RXO_sub",
55   "RXO_mul",
56   "RXO_div",
57   "RXO_divu",
58   "RXO_shll",
59   "RXO_shar",
60   "RXO_shlr",
61 
62   "RXO_adc",	/* d = d + s + carry */
63   "RXO_sbb",	/* d = d - s - ~carry */
64   "RXO_abs",	/* d = |s| */
65   "RXO_max",	/* d = max(d,s) */
66   "RXO_min",	/* d = min(d,s) */
67   "RXO_emul",	/* d:64 = d:32 * s */
68   "RXO_emulu",	/* d:64 = d:32 * s (unsigned) */
69 
70   "RXO_rolc",	/* d <<= 1 through carry */
71   "RXO_rorc",	/* d >>= 1 through carry*/
72   "RXO_rotl",	/* d <<= #s without carry */
73   "RXO_rotr",	/* d >>= #s without carry*/
74   "RXO_revw",	/* d = revw(s) */
75   "RXO_revl",	/* d = revl(s) */
76   "RXO_branch",	/* pc = d if cond(s) */
77   "RXO_branchrel",/* pc += d if cond(s) */
78   "RXO_jsr",	/* pc = d */
79   "RXO_jsrrel",	/* pc += d */
80   "RXO_rts",
81   "RXO_nop",
82   "RXO_nop2",
83   "RXO_nop3",
84   "RXO_nop4",
85   "RXO_nop5",
86   "RXO_nop6",
87   "RXO_nop7",
88 
89   "RXO_scmpu",
90   "RXO_smovu",
91   "RXO_smovb",
92   "RXO_suntil",
93   "RXO_swhile",
94   "RXO_smovf",
95   "RXO_sstr",
96 
97   "RXO_rmpa",
98   "RXO_mulhi",
99   "RXO_mullo",
100   "RXO_machi",
101   "RXO_maclo",
102   "RXO_mvtachi",
103   "RXO_mvtaclo",
104   "RXO_mvfachi",
105   "RXO_mvfacmi",
106   "RXO_mvfaclo",
107   "RXO_racw",
108 
109   "RXO_sat",	/* sat(d) */
110   "RXO_satr",
111 
112   "RXO_fadd",	/* d op= s */
113   "RXO_fcmp",
114   "RXO_fsub",
115   "RXO_ftoi",
116   "RXO_fmul",
117   "RXO_fdiv",
118   "RXO_round",
119   "RXO_itof",
120 
121   "RXO_bset",	/* d |= (1<<s) */
122   "RXO_bclr",	/* d &= ~(1<<s) */
123   "RXO_btst",	/* s & (1<<s2) */
124   "RXO_bnot",	/* d ^= (1<<s) */
125   "RXO_bmcc",	/* d<s> = cond(s2) */
126 
127   "RXO_clrpsw",	/* flag index in d */
128   "RXO_setpsw",	/* flag index in d */
129   "RXO_mvtipl",	/* new IPL in s */
130 
131   "RXO_rtfi",
132   "RXO_rte",
133   "RXO_rtd",	/* undocumented */
134   "RXO_brk",
135   "RXO_dbt",	/* undocumented */
136   "RXO_int",	/* vector id in s */
137   "RXO_stop",
138   "RXO_wait",
139 
140   "RXO_sccnd",	/* d = cond(s) ? 1 : 0 */
141 };
142 
143 static const char * optype_names[] = {
144   " -  ",
145   "#Imm",	/* #addend */
146   " Rn ",	/* Rn */
147   "[Rn]",	/* [Rn + addend] */
148   "Ps++",	/* [Rn+] */
149   "--Pr",	/* [-Rn] */
150   " cc ",	/* eq, gtu, etc */
151   "Flag",	/* [UIOSZC] */
152   "RbRi"	/* [Rb + scale * Ri] */
153 };
154 
155 #define N_RXO ARRAY_SIZE (id_names)
156 #define N_RXT ARRAY_SIZE (optype_names)
157 #define N_MAP 90
158 
159 static unsigned long long benchmark_start_cycle;
160 static unsigned long long benchmark_end_cycle;
161 
162 static int op_cache[N_RXT][N_RXT][N_RXT];
163 static int op_cache_rev[N_MAP];
164 static int op_cache_idx = 0;
165 
166 static int
167 op_lookup (int a, int b, int c)
168 {
169   if (op_cache[a][b][c])
170     return op_cache[a][b][c];
171   op_cache_idx ++;
172   if (op_cache_idx >= N_MAP)
173     {
174       printf("op_cache_idx exceeds %d\n", N_MAP);
175       exit(1);
176     }
177   op_cache[a][b][c] = op_cache_idx;
178   op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c;
179   return op_cache_idx;
180 }
181 
182 static char *
183 op_cache_string (int map)
184 {
185   static int ci;
186   static char cb[5][20];
187   int a, b, c;
188 
189   map = op_cache_rev[map];
190   a = (map >> 8) & 15;
191   b = (map >> 4) & 15;
192   c = (map >> 0) & 15;
193   ci = (ci + 1) % 5;
194   sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]);
195   return cb[ci];
196 }
197 
198 static unsigned long long cycles_per_id[N_RXO][N_MAP];
199 static unsigned long long times_per_id[N_RXO][N_MAP];
200 static unsigned long long memory_stalls;
201 static unsigned long long register_stalls;
202 static unsigned long long branch_stalls;
203 static unsigned long long branch_alignment_stalls;
204 static unsigned long long fast_returns;
205 
206 static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP];
207 static int prev_opcode_id = RXO_unknown;
208 static int po0;
209 
210 #define STATS(x) x
211 
212 #else
213 #define STATS(x)
214 #endif /* CYCLE_STATS */
215 
216 
217 #ifdef CYCLE_ACCURATE
218 
219 static int new_rt = -1;
220 
221 /* Number of cycles to add if an insn spans an 8-byte boundary.  */
222 static int branch_alignment_penalty = 0;
223 
224 #endif
225 
226 static int running_benchmark = 1;
227 
228 #define tprintf if (trace && running_benchmark) printf
229 
230 jmp_buf decode_jmp_buf;
231 unsigned int rx_cycles = 0;
232 
233 #ifdef CYCLE_ACCURATE
234 /* If nonzero, memory was read at some point and cycle latency might
235    take effect.  */
236 static int memory_source = 0;
237 /* If nonzero, memory was written and extra cycles might be
238    needed.  */
239 static int memory_dest = 0;
240 
241 static void
242 cycles (int throughput)
243 {
244   tprintf("%d cycles\n", throughput);
245   regs.cycle_count += throughput;
246 }
247 
248 /* Number of execution (E) cycles the op uses.  For memory sources, we
249    include the load micro-op stall as two extra E cycles.  */
250 #define E(c) cycles (memory_source ? c + 2 : c)
251 #define E1 cycles (1)
252 #define E2 cycles (2)
253 #define EBIT cycles (memory_source ? 2 : 1)
254 
255 /* Check to see if a read latency must be applied for a given register.  */
256 #define RL(r) \
257   if (regs.rt == r )							\
258     {									\
259       tprintf("register %d load stall\n", r);				\
260       regs.cycle_count ++;						\
261       STATS(register_stalls ++);					\
262       regs.rt = -1;							\
263     }
264 
265 #define RLD(r)					\
266   if (memory_source)				\
267     {						\
268       tprintf ("Rt now %d\n", r);		\
269       new_rt = r;				\
270     }
271 
272 static int
273 lsb_count (unsigned long v, int is_signed)
274 {
275   int i, lsb;
276   if (is_signed && (v & 0x80000000U))
277     v = (unsigned long)(long)(-v);
278   for (i=31; i>=0; i--)
279     if (v & (1 << i))
280       {
281 	/* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */
282 	lsb = (i + 2) / 2;
283 	return lsb;
284       }
285   return 0;
286 }
287 
288 static int
289 divu_cycles(unsigned long num, unsigned long den)
290 {
291   int nb = lsb_count (num, 0);
292   int db = lsb_count (den, 0);
293   int rv;
294 
295   if (nb < db)
296     rv = 2;
297   else
298     rv = 3 + nb - db;
299   E (rv);
300   return rv;
301 }
302 
303 static int
304 div_cycles(long num, long den)
305 {
306   int nb = lsb_count ((unsigned long)num, 1);
307   int db = lsb_count ((unsigned long)den, 1);
308   int rv;
309 
310   if (nb < db)
311     rv = 3;
312   else
313     rv = 5 + nb - db;
314   E (rv);
315   return rv;
316 }
317 
318 #else /* !CYCLE_ACCURATE */
319 
320 #define cycles(t)
321 #define E(c)
322 #define E1
323 #define E2
324 #define EBIT
325 #define RL(r)
326 #define RLD(r)
327 
328 #define divu_cycles(n,d)
329 #define div_cycles(n,d)
330 
331 #endif /* else CYCLE_ACCURATE */
332 
333 static int size2bytes[] = {
334   4, 1, 1, 1, 2, 2, 2, 3, 4
335 };
336 
337 typedef struct {
338   unsigned long dpc;
339 } RX_Data;
340 
341 #define rx_abort() _rx_abort(__FILE__, __LINE__)
342 static void
343 _rx_abort (const char *file, int line)
344 {
345   if (strrchr (file, '/'))
346     file = strrchr (file, '/') + 1;
347   fprintf(stderr, "abort at %s:%d\n", file, line);
348   abort();
349 }
350 
351 static unsigned char *get_byte_base;
352 static RX_Opcode_Decoded **decode_cache_base;
353 static SI get_byte_page;
354 
355 void
356 reset_decoder (void)
357 {
358   get_byte_base = 0;
359   decode_cache_base = 0;
360   get_byte_page = 0;
361 }
362 
363 static inline void
364 maybe_get_mem_page (SI tpc)
365 {
366   if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting)
367     {
368       get_byte_page = tpc & NONPAGE_MASK;
369       get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page;
370       decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page;
371     }
372 }
373 
374 /* This gets called a *lot* so optimize it.  */
375 static int
376 rx_get_byte (void *vdata)
377 {
378   RX_Data *rx_data = (RX_Data *)vdata;
379   SI tpc = rx_data->dpc;
380 
381   /* See load.c for an explanation of this.  */
382   if (rx_big_endian)
383     tpc ^= 3;
384 
385   maybe_get_mem_page (tpc);
386 
387   rx_data->dpc ++;
388   return get_byte_base [tpc];
389 }
390 
391 static int
392 get_op (const RX_Opcode_Decoded *rd, int i)
393 {
394   const RX_Opcode_Operand *o = rd->op + i;
395   int addr, rv = 0;
396 
397   switch (o->type)
398     {
399     case RX_Operand_None:
400       rx_abort ();
401 
402     case RX_Operand_Immediate:	/* #addend */
403       return o->addend;
404 
405     case RX_Operand_Register:	/* Rn */
406       RL (o->reg);
407       rv = get_reg (o->reg);
408       break;
409 
410     case RX_Operand_Predec:	/* [-Rn] */
411       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
412       /* fall through */
413     case RX_Operand_Postinc:	/* [Rn+] */
414     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
415     case RX_Operand_Indirect:	/* [Rn + addend] */
416     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
417 #ifdef CYCLE_ACCURATE
418       RL (o->reg);
419       if (o->type == RX_Operand_TwoReg)
420 	RL (rd->op[2].reg);
421       regs.rt = -1;
422       if (regs.m2m == M2M_BOTH)
423 	{
424 	  tprintf("src memory stall\n");
425 #ifdef CYCLE_STATS
426 	  memory_stalls ++;
427 #endif
428 	  regs.cycle_count ++;
429 	  regs.m2m = 0;
430 	}
431 
432       memory_source = 1;
433 #endif
434 
435       if (o->type == RX_Operand_TwoReg)
436 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
437       else
438 	addr = get_reg (o->reg) + o->addend;
439 
440       switch (o->size)
441 	{
442 	default:
443 	case RX_AnySize:
444 	  rx_abort ();
445 
446 	case RX_Byte: /* undefined extension */
447 	case RX_UByte:
448 	case RX_SByte:
449 	  rv = mem_get_qi (addr);
450 	  break;
451 
452 	case RX_Word: /* undefined extension */
453 	case RX_UWord:
454 	case RX_SWord:
455 	  rv = mem_get_hi (addr);
456 	  break;
457 
458 	case RX_3Byte:
459 	  rv = mem_get_psi (addr);
460 	  break;
461 
462 	case RX_Long:
463 	  rv = mem_get_si (addr);
464 	  break;
465 	}
466 
467       if (o->type == RX_Operand_Postinc)
468 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
469 
470       break;
471 
472     case RX_Operand_Condition:	/* eq, gtu, etc */
473       return condition_true (o->reg);
474 
475     case RX_Operand_Flag:	/* [UIOSZC] */
476       return (regs.r_psw & (1 << o->reg)) ? 1 : 0;
477     }
478 
479   /* if we've gotten here, we need to clip/extend the value according
480      to the size.  */
481   switch (o->size)
482     {
483     default:
484     case RX_AnySize:
485       rx_abort ();
486 
487     case RX_Byte: /* undefined extension */
488       rv |= 0xdeadbe00; /* keep them honest */
489       break;
490 
491     case RX_UByte:
492       rv &= 0xff;
493       break;
494 
495     case RX_SByte:
496       rv = sign_ext (rv, 8);
497       break;
498 
499     case RX_Word: /* undefined extension */
500       rv |= 0xdead0000; /* keep them honest */
501       break;
502 
503     case RX_UWord:
504       rv &=  0xffff;
505       break;
506 
507     case RX_SWord:
508       rv = sign_ext (rv, 16);
509       break;
510 
511     case RX_3Byte:
512       rv &= 0xffffff;
513       break;
514 
515     case RX_Long:
516       break;
517     }
518   return rv;
519 }
520 
521 static void
522 put_op (const RX_Opcode_Decoded *rd, int i, int v)
523 {
524   const RX_Opcode_Operand *o = rd->op + i;
525   int addr;
526 
527   switch (o->size)
528     {
529     default:
530     case RX_AnySize:
531       if (o->type != RX_Operand_Register)
532 	rx_abort ();
533       break;
534 
535     case RX_Byte: /* undefined extension */
536       v |= 0xdeadbe00; /* keep them honest */
537       break;
538 
539     case RX_UByte:
540       v &= 0xff;
541       break;
542 
543     case RX_SByte:
544       v = sign_ext (v, 8);
545       break;
546 
547     case RX_Word: /* undefined extension */
548       v |= 0xdead0000; /* keep them honest */
549       break;
550 
551     case RX_UWord:
552       v &=  0xffff;
553       break;
554 
555     case RX_SWord:
556       v = sign_ext (v, 16);
557       break;
558 
559     case RX_3Byte:
560       v &= 0xffffff;
561       break;
562 
563     case RX_Long:
564       break;
565     }
566 
567   switch (o->type)
568     {
569     case RX_Operand_None:
570       /* Opcodes like TST and CMP use this.  */
571       break;
572 
573     case RX_Operand_Immediate:	/* #addend */
574     case RX_Operand_Condition:	/* eq, gtu, etc */
575       rx_abort ();
576 
577     case RX_Operand_Register:	/* Rn */
578       put_reg (o->reg, v);
579       RLD (o->reg);
580       break;
581 
582     case RX_Operand_Predec:	/* [-Rn] */
583       put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]);
584       /* fall through */
585     case RX_Operand_Postinc:	/* [Rn+] */
586     case RX_Operand_Zero_Indirect:	/* [Rn + 0] */
587     case RX_Operand_Indirect:	/* [Rn + addend] */
588     case RX_Operand_TwoReg:	/* [Rn + scale * R2] */
589 
590 #ifdef CYCLE_ACCURATE
591       if (regs.m2m == M2M_BOTH)
592 	{
593 	  tprintf("dst memory stall\n");
594 	  regs.cycle_count ++;
595 #ifdef CYCLE_STATS
596 	  memory_stalls ++;
597 #endif
598 	  regs.m2m = 0;
599 	}
600       memory_dest = 1;
601 #endif
602 
603       if (o->type == RX_Operand_TwoReg)
604 	addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg);
605       else
606 	addr = get_reg (o->reg) + o->addend;
607 
608       switch (o->size)
609 	{
610 	default:
611 	case RX_AnySize:
612 	  rx_abort ();
613 
614 	case RX_Byte: /* undefined extension */
615 	case RX_UByte:
616 	case RX_SByte:
617 	  mem_put_qi (addr, v);
618 	  break;
619 
620 	case RX_Word: /* undefined extension */
621 	case RX_UWord:
622 	case RX_SWord:
623 	  mem_put_hi (addr, v);
624 	  break;
625 
626 	case RX_3Byte:
627 	  mem_put_psi (addr, v);
628 	  break;
629 
630 	case RX_Long:
631 	  mem_put_si (addr, v);
632 	  break;
633 	}
634 
635       if (o->type == RX_Operand_Postinc)
636 	put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]);
637 
638       break;
639 
640     case RX_Operand_Flag:	/* [UIOSZC] */
641       if (v)
642 	regs.r_psw |= (1 << o->reg);
643       else
644 	regs.r_psw &= ~(1 << o->reg);
645       break;
646     }
647 }
648 
649 #define PD(x) put_op (opcode, 0, x)
650 #define PS(x) put_op (opcode, 1, x)
651 #define PS2(x) put_op (opcode, 2, x)
652 #define GD() get_op (opcode, 0)
653 #define GS() get_op (opcode, 1)
654 #define GS2() get_op (opcode, 2)
655 #define DSZ() size2bytes[opcode->op[0].size]
656 #define SSZ() size2bytes[opcode->op[0].size]
657 #define S2SZ() size2bytes[opcode->op[0].size]
658 
659 /* "Universal" sources.  */
660 #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS())
661 #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2())
662 
663 static void
664 push(int val)
665 {
666   int rsp = get_reg (sp);
667   rsp -= 4;
668   put_reg (sp, rsp);
669   mem_put_si (rsp, val);
670 }
671 
672 /* Just like the above, but tag the memory as "pushed pc" so if anyone
673    tries to write to it, it will cause an error.  */
674 static void
675 pushpc(int val)
676 {
677   int rsp = get_reg (sp);
678   rsp -= 4;
679   put_reg (sp, rsp);
680   mem_put_si (rsp, val);
681   mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC);
682 }
683 
684 static int
685 pop()
686 {
687   int rv;
688   int rsp = get_reg (sp);
689   rv = mem_get_si (rsp);
690   rsp += 4;
691   put_reg (sp, rsp);
692   return rv;
693 }
694 
695 static int
696 poppc()
697 {
698   int rv;
699   int rsp = get_reg (sp);
700   if (mem_get_content_type (rsp) != MC_PUSHED_PC)
701     execution_error (SIM_ERR_CORRUPT_STACK, rsp);
702   rv = mem_get_si (rsp);
703   mem_set_content_range (rsp, rsp+3, MC_UNINIT);
704   rsp += 4;
705   put_reg (sp, rsp);
706   return rv;
707 }
708 
709 #define MATH_OP(vop,c)				\
710 { \
711   umb = US2(); \
712   uma = US1(); \
713   ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
714   tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
715   ma = sign_ext (uma, DSZ() * 8);					\
716   mb = sign_ext (umb, DSZ() * 8);					\
717   sll = (long long) ma vop (long long) mb vop c; \
718   tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
719   set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
720   PD (sll); \
721   E (1);    \
722 }
723 
724 #define LOGIC_OP(vop) \
725 { \
726   mb = US2(); \
727   ma = US1(); \
728   v = ma vop mb; \
729   tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
730   set_sz (v, DSZ()); \
731   PD(v); \
732   E (1); \
733 }
734 
735 #define SHIFT_OP(val, type, count, OP, carry_mask)	\
736 { \
737   int i, c=0; \
738   count = US2(); \
739   val = (type)US1();				\
740   tprintf("%lld " #OP " %d\n", val, count); \
741   for (i = 0; i < count; i ++) \
742     { \
743       c = val & carry_mask; \
744       val OP 1; \
745     } \
746   set_oszc (val, 4, c); \
747   PD (val); \
748 }
749 
750 typedef union {
751   int i;
752   float f;
753 } FloatInt;
754 
755 static inline int
756 float2int (float f)
757 {
758   FloatInt fi;
759   fi.f = f;
760   return fi.i;
761 }
762 
763 static inline float
764 int2float (int i)
765 {
766   FloatInt fi;
767   fi.i = i;
768   return fi.f;
769 }
770 
771 static int
772 fop_fadd (fp_t s1, fp_t s2, fp_t *d)
773 {
774   *d = rxfp_add (s1, s2);
775   return 1;
776 }
777 
778 static int
779 fop_fmul (fp_t s1, fp_t s2, fp_t *d)
780 {
781   *d = rxfp_mul (s1, s2);
782   return 1;
783 }
784 
785 static int
786 fop_fdiv (fp_t s1, fp_t s2, fp_t *d)
787 {
788   *d = rxfp_div (s1, s2);
789   return 1;
790 }
791 
792 static int
793 fop_fsub (fp_t s1, fp_t s2, fp_t *d)
794 {
795   *d = rxfp_sub (s1, s2);
796   return 1;
797 }
798 
799 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
800 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
801 #define FPCHECK() \
802   if (FPPENDING()) \
803     return do_fp_exception (opcode_pc)
804 
805 #define FLOAT_OP(func) \
806 { \
807   int do_store;   \
808   fp_t fa, fb, fc; \
809   FPCLEAR(); \
810   fb = GS (); \
811   fa = GD (); \
812   do_store = fop_##func (fa, fb, &fc); \
813   tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
814   FPCHECK(); \
815   if (do_store) \
816     PD (fc);	\
817   mb = 0; \
818   if ((fc & 0x80000000UL) != 0) \
819     mb |= FLAGBIT_S; \
820   if ((fc & 0x7fffffffUL) == 0)			\
821     mb |= FLAGBIT_Z; \
822   set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
823 }
824 
825 #define carry (FLAG_C ? 1 : 0)
826 
827 static struct {
828   unsigned long vaddr;
829   const char *str;
830   int signal;
831 } exception_info[] = {
832   { 0xFFFFFFD0UL, "priviledged opcode", SIGILL },
833   { 0xFFFFFFD4UL, "access violation", SIGSEGV },
834   { 0xFFFFFFDCUL, "undefined opcode", SIGILL },
835   { 0xFFFFFFE4UL, "floating point", SIGFPE }
836 };
837 #define EX_PRIVILEDGED	0
838 #define EX_ACCESS	1
839 #define EX_UNDEFINED	2
840 #define EX_FLOATING	3
841 #define EXCEPTION(n)  \
842   return generate_exception (n, opcode_pc)
843 
844 #define PRIVILEDGED() \
845   if (FLAG_PM) \
846     EXCEPTION (EX_PRIVILEDGED)
847 
848 static int
849 generate_exception (unsigned long type, SI opcode_pc)
850 {
851   SI old_psw, old_pc, new_pc;
852 
853   new_pc = mem_get_si (exception_info[type].vaddr);
854   /* 0x00020000 is the value used to initialise the known
855      exception vectors (see rx.ld), but it is a reserved
856      area of memory so do not try to access it, and if the
857      value has not been changed by the program then the
858      vector has not been installed.  */
859   if (new_pc == 0 || new_pc == 0x00020000)
860     {
861       if (rx_in_gdb)
862 	return RX_MAKE_STOPPED (exception_info[type].signal);
863 
864       fprintf(stderr, "Unhandled %s exception at pc = %#lx\n",
865 	      exception_info[type].str, (unsigned long) opcode_pc);
866       if (type == EX_FLOATING)
867 	{
868 	  int mask = FPPENDING ();
869 	  fprintf (stderr, "Pending FP exceptions:");
870 	  if (mask & FPSWBITS_FV)
871 	    fprintf(stderr, " Invalid");
872 	  if (mask & FPSWBITS_FO)
873 	    fprintf(stderr, " Overflow");
874 	  if (mask & FPSWBITS_FZ)
875 	    fprintf(stderr, " Division-by-zero");
876 	  if (mask & FPSWBITS_FU)
877 	    fprintf(stderr, " Underflow");
878 	  if (mask & FPSWBITS_FX)
879 	    fprintf(stderr, " Inexact");
880 	  if (mask & FPSWBITS_CE)
881 	    fprintf(stderr, " Unimplemented");
882 	  fprintf(stderr, "\n");
883 	}
884       return RX_MAKE_EXITED (1);
885     }
886 
887   tprintf ("Triggering %s exception\n", exception_info[type].str);
888 
889   old_psw = regs.r_psw;
890   regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
891   old_pc = opcode_pc;
892   regs.r_pc = new_pc;
893   pushpc (old_psw);
894   pushpc (old_pc);
895   return RX_MAKE_STEPPED ();
896 }
897 
898 void
899 generate_access_exception (void)
900 {
901   int rv;
902 
903   rv = generate_exception (EX_ACCESS, regs.r_pc);
904   if (RX_EXITED (rv))
905     longjmp (decode_jmp_buf, rv);
906 }
907 
908 static int
909 do_fp_exception (unsigned long opcode_pc)
910 {
911   while (FPPENDING())
912     EXCEPTION (EX_FLOATING);
913   return RX_MAKE_STEPPED ();
914 }
915 
916 static int
917 op_is_memory (const RX_Opcode_Decoded *rd, int i)
918 {
919   switch (rd->op[i].type)
920     {
921     case RX_Operand_Predec:
922     case RX_Operand_Postinc:
923     case RX_Operand_Indirect:
924       return 1;
925     default:
926       return 0;
927     }
928 }
929 #define OM(i) op_is_memory (opcode, i)
930 
931 #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); }
932 
933 int
934 decode_opcode ()
935 {
936   unsigned int uma=0, umb=0;
937   int ma=0, mb=0;
938   int opcode_size, v;
939   unsigned long long ll;
940   long long sll;
941   unsigned long opcode_pc;
942   RX_Data rx_data;
943   const RX_Opcode_Decoded *opcode;
944 #ifdef CYCLE_STATS
945   unsigned long long prev_cycle_count;
946 #endif
947 #ifdef CYCLE_ACCURATE
948   unsigned int tx;
949 #endif
950 
951 #ifdef CYCLE_STATS
952   prev_cycle_count = regs.cycle_count;
953 #endif
954 
955 #ifdef CYCLE_ACCURATE
956   memory_source = 0;
957   memory_dest = 0;
958 #endif
959 
960   rx_cycles ++;
961 
962   maybe_get_mem_page (regs.r_pc);
963 
964   opcode_pc = regs.r_pc;
965 
966   /* Note that we don't word-swap this point, there's no point.  */
967   if (decode_cache_base[opcode_pc] == NULL)
968     {
969       RX_Opcode_Decoded *opcode_w;
970       rx_data.dpc = opcode_pc;
971       opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded));
972       opcode_size = rx_decode_opcode (opcode_pc, opcode_w,
973 				      rx_get_byte, &rx_data);
974       opcode = opcode_w;
975     }
976   else
977     {
978       opcode = decode_cache_base[opcode_pc];
979       opcode_size = opcode->n_bytes;
980     }
981 
982 #ifdef CYCLE_ACCURATE
983   if (branch_alignment_penalty)
984     {
985       if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7)
986 	{
987 	  tprintf("1 cycle branch alignment penalty\n");
988 	  cycles (branch_alignment_penalty);
989 #ifdef CYCLE_STATS
990 	  branch_alignment_stalls ++;
991 #endif
992 	}
993       branch_alignment_penalty = 0;
994     }
995 #endif
996 
997   regs.r_pc += opcode_size;
998 
999   rx_flagmask = opcode->flags_s;
1000   rx_flagand = ~(int)opcode->flags_0;
1001   rx_flagor = opcode->flags_1;
1002 
1003   switch (opcode->id)
1004     {
1005     case RXO_abs:
1006       sll = GS ();
1007       tprintf("|%lld| = ", sll);
1008       if (sll < 0)
1009 	sll = -sll;
1010       tprintf("%lld\n", sll);
1011       PD (sll);
1012       set_osz (sll, 4);
1013       E (1);
1014       break;
1015 
1016     case RXO_adc:
1017       MATH_OP (+,carry);
1018       break;
1019 
1020     case RXO_add:
1021       MATH_OP (+,0);
1022       break;
1023 
1024     case RXO_and:
1025       LOGIC_OP (&);
1026       break;
1027 
1028     case RXO_bclr:
1029       ma = GD ();
1030       mb = GS ();
1031       if (opcode->op[0].type == RX_Operand_Register)
1032 	mb &= 0x1f;
1033       else
1034 	mb &= 0x07;
1035       ma &= ~(1 << mb);
1036       PD (ma);
1037       EBIT;
1038       break;
1039 
1040     case RXO_bmcc:
1041       ma = GD ();
1042       mb = GS ();
1043       if (opcode->op[0].type == RX_Operand_Register)
1044 	mb &= 0x1f;
1045       else
1046 	mb &= 0x07;
1047       if (GS2 ())
1048 	ma |= (1 << mb);
1049       else
1050 	ma &= ~(1 << mb);
1051       PD (ma);
1052       EBIT;
1053       break;
1054 
1055     case RXO_bnot:
1056       ma = GD ();
1057       mb = GS ();
1058       if (opcode->op[0].type == RX_Operand_Register)
1059 	mb &= 0x1f;
1060       else
1061 	mb &= 0x07;
1062       ma ^= (1 << mb);
1063       PD (ma);
1064       EBIT;
1065       break;
1066 
1067     case RXO_branch:
1068       if (opcode->op[1].type == RX_Operand_None || GS())
1069 	{
1070 #ifdef CYCLE_ACCURATE
1071 	  SI old_pc = regs.r_pc;
1072 	  int delta;
1073 #endif
1074 	  regs.r_pc = GD();
1075 #ifdef CYCLE_ACCURATE
1076 	  delta = regs.r_pc - old_pc;
1077 	  if (delta >= 0 && delta < 16
1078 	      && opcode_size > 1)
1079 	    {
1080 	      tprintf("near forward branch bonus\n");
1081 	      cycles (2);
1082 	    }
1083 	  else
1084 	    {
1085 	      cycles (3);
1086 	      branch_alignment_penalty = 1;
1087 	    }
1088 #ifdef CYCLE_STATS
1089 	  branch_stalls ++;
1090 #endif
1091 #endif
1092 	}
1093 #ifdef CYCLE_ACCURATE
1094       else
1095 	cycles (1);
1096 #endif
1097       break;
1098 
1099     case RXO_branchrel:
1100       if (opcode->op[1].type == RX_Operand_None || GS())
1101 	{
1102 	  int delta = GD();
1103 	  regs.r_pc = opcode_pc + delta;
1104 #ifdef CYCLE_ACCURATE
1105 	  /* Note: specs say 3, chip says 2.  */
1106 	  if (delta >= 0 && delta < 16
1107 	      && opcode_size > 1)
1108 	    {
1109 	      tprintf("near forward branch bonus\n");
1110 	      cycles (2);
1111 	    }
1112 	  else
1113 	    {
1114 	      cycles (3);
1115 	      branch_alignment_penalty = 1;
1116 	    }
1117 #ifdef CYCLE_STATS
1118 	  branch_stalls ++;
1119 #endif
1120 #endif
1121 	}
1122 #ifdef CYCLE_ACCURATE
1123       else
1124 	cycles (1);
1125 #endif
1126       break;
1127 
1128     case RXO_brk:
1129       {
1130 	int old_psw = regs.r_psw;
1131 	if (rx_in_gdb)
1132 	  DO_RETURN (RX_MAKE_HIT_BREAK ());
1133 	if (regs.r_intb == 0)
1134 	  {
1135 	    tprintf("BREAK hit, no vector table.\n");
1136 	    DO_RETURN (RX_MAKE_EXITED(1));
1137 	  }
1138 	regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1139 	pushpc (old_psw);
1140 	pushpc (regs.r_pc);
1141 	regs.r_pc = mem_get_si (regs.r_intb);
1142 	cycles(6);
1143       }
1144       break;
1145 
1146     case RXO_bset:
1147       ma = GD ();
1148       mb = GS ();
1149       if (opcode->op[0].type == RX_Operand_Register)
1150 	mb &= 0x1f;
1151       else
1152 	mb &= 0x07;
1153       ma |= (1 << mb);
1154       PD (ma);
1155       EBIT;
1156       break;
1157 
1158     case RXO_btst:
1159       ma = GS ();
1160       mb = GS2 ();
1161       if (opcode->op[1].type == RX_Operand_Register)
1162 	mb &= 0x1f;
1163       else
1164 	mb &= 0x07;
1165       umb = ma & (1 << mb);
1166       set_zc (! umb, umb);
1167       EBIT;
1168       break;
1169 
1170     case RXO_clrpsw:
1171       v = 1 << opcode->op[0].reg;
1172       if (FLAG_PM
1173 	  && (v == FLAGBIT_I
1174 	      || v == FLAGBIT_U))
1175 	break;
1176       regs.r_psw &= ~v;
1177       cycles (1);
1178       break;
1179 
1180     case RXO_div: /* d = d / s */
1181       ma = GS();
1182       mb = GD();
1183       tprintf("%d / %d = ", mb, ma);
1184       if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000))
1185 	{
1186 	  tprintf("#NAN\n");
1187 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1188 	  cycles (3);
1189 	}
1190       else
1191 	{
1192 	  v = mb/ma;
1193 	  tprintf("%d\n", v);
1194 	  set_flags (FLAGBIT_O, 0);
1195 	  PD (v);
1196 	  div_cycles (mb, ma);
1197 	}
1198       break;
1199 
1200     case RXO_divu: /* d = d / s */
1201       uma = GS();
1202       umb = GD();
1203       tprintf("%u / %u = ", umb, uma);
1204       if (uma == 0)
1205 	{
1206 	  tprintf("#NAN\n");
1207 	  set_flags (FLAGBIT_O, FLAGBIT_O);
1208 	  cycles (2);
1209 	}
1210       else
1211 	{
1212 	  v = umb / uma;
1213 	  tprintf("%u\n", v);
1214 	  set_flags (FLAGBIT_O, 0);
1215 	  PD (v);
1216 	  divu_cycles (umb, uma);
1217 	}
1218       break;
1219 
1220     case RXO_emul:
1221       ma = GD ();
1222       mb = GS ();
1223       sll = (long long)ma * (long long)mb;
1224       tprintf("%d * %d = %lld\n", ma, mb, sll);
1225       put_reg (opcode->op[0].reg, sll);
1226       put_reg (opcode->op[0].reg + 1, sll >> 32);
1227       E2;
1228       break;
1229 
1230     case RXO_emulu:
1231       uma = GD ();
1232       umb = GS ();
1233       ll = (long long)uma * (long long)umb;
1234       tprintf("%#x * %#x = %#llx\n", uma, umb, ll);
1235       put_reg (opcode->op[0].reg, ll);
1236       put_reg (opcode->op[0].reg + 1, ll >> 32);
1237       E2;
1238       break;
1239 
1240     case RXO_fadd:
1241       FLOAT_OP (fadd);
1242       E (4);
1243       break;
1244 
1245     case RXO_fcmp:
1246       ma = GD();
1247       mb = GS();
1248       FPCLEAR ();
1249       rxfp_cmp (ma, mb);
1250       FPCHECK ();
1251       E (1);
1252       break;
1253 
1254     case RXO_fdiv:
1255       FLOAT_OP (fdiv);
1256       E (16);
1257       break;
1258 
1259     case RXO_fmul:
1260       FLOAT_OP (fmul);
1261       E (3);
1262       break;
1263 
1264     case RXO_rtfi:
1265       PRIVILEDGED ();
1266       regs.r_psw = regs.r_bpsw;
1267       regs.r_pc = regs.r_bpc;
1268 #ifdef CYCLE_ACCURATE
1269       regs.fast_return = 0;
1270       cycles(3);
1271 #endif
1272       break;
1273 
1274     case RXO_fsub:
1275       FLOAT_OP (fsub);
1276       E (4);
1277       break;
1278 
1279     case RXO_ftoi:
1280       ma = GS ();
1281       FPCLEAR ();
1282       mb = rxfp_ftoi (ma, FPRM_ZERO);
1283       FPCHECK ();
1284       PD (mb);
1285       tprintf("(int) %g = %d\n", int2float(ma), mb);
1286       set_sz (mb, 4);
1287       E (2);
1288       break;
1289 
1290     case RXO_int:
1291       v = GS ();
1292       if (v == 255)
1293 	{
1294 	  int rc = rx_syscall (regs.r[5]);
1295 	  if (! RX_STEPPED (rc))
1296 	    DO_RETURN (rc);
1297 	}
1298       else
1299 	{
1300 	  int old_psw = regs.r_psw;
1301 	  regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM);
1302 	  pushpc (old_psw);
1303 	  pushpc (regs.r_pc);
1304 	  regs.r_pc = mem_get_si (regs.r_intb + 4 * v);
1305 	}
1306       cycles (6);
1307       break;
1308 
1309     case RXO_itof:
1310       ma = GS ();
1311       FPCLEAR ();
1312       mb = rxfp_itof (ma, regs.r_fpsw);
1313       FPCHECK ();
1314       tprintf("(float) %d = %x\n", ma, mb);
1315       PD (mb);
1316       set_sz (ma, 4);
1317       E (2);
1318       break;
1319 
1320     case RXO_jsr:
1321     case RXO_jsrrel:
1322       {
1323 #ifdef CYCLE_ACCURATE
1324 	int delta;
1325 	regs.m2m = 0;
1326 #endif
1327 	v = GD ();
1328 #ifdef CYCLE_ACCURATE
1329 	regs.link_register = regs.r_pc;
1330 #endif
1331 	pushpc (get_reg (pc));
1332 	if (opcode->id == RXO_jsrrel)
1333 	  v += regs.r_pc;
1334 #ifdef CYCLE_ACCURATE
1335 	delta = v - regs.r_pc;
1336 #endif
1337 	put_reg (pc, v);
1338 #ifdef CYCLE_ACCURATE
1339 	/* Note: docs say 3, chip says 2 */
1340 	if (delta >= 0 && delta < 16)
1341 	  {
1342 	    tprintf ("near forward jsr bonus\n");
1343 	    cycles (2);
1344 	  }
1345 	else
1346 	  {
1347 	    branch_alignment_penalty = 1;
1348 	    cycles (3);
1349 	  }
1350 	regs.fast_return = 1;
1351 #endif
1352       }
1353       break;
1354 
1355     case RXO_machi:
1356       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1357       ll <<= 16;
1358       put_reg64 (acc64, ll + regs.r_acc);
1359       E1;
1360       break;
1361 
1362     case RXO_maclo:
1363       ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1364       ll <<= 16;
1365       put_reg64 (acc64, ll + regs.r_acc);
1366       E1;
1367       break;
1368 
1369     case RXO_max:
1370       mb = GS();
1371       ma = GD();
1372       if (ma > mb)
1373 	PD (ma);
1374       else
1375 	PD (mb);
1376       E (1);
1377       break;
1378 
1379     case RXO_min:
1380       mb = GS();
1381       ma = GD();
1382       if (ma < mb)
1383 	PD (ma);
1384       else
1385 	PD (mb);
1386       E (1);
1387       break;
1388 
1389     case RXO_mov:
1390       v = GS ();
1391 
1392       if (opcode->op[1].type == RX_Operand_Register
1393 	  && opcode->op[1].reg == 17 /* PC */)
1394 	{
1395 	  /* Special case.  We want the address of the insn, not the
1396 	     address of the next insn.  */
1397 	  v = opcode_pc;
1398 	}
1399 
1400       if (opcode->op[0].type == RX_Operand_Register
1401 	  && opcode->op[0].reg == 16 /* PSW */)
1402 	{
1403 	  /* Special case, LDC and POPC can't ever modify PM.  */
1404 	  int pm = regs.r_psw & FLAGBIT_PM;
1405 	  v &= ~ FLAGBIT_PM;
1406 	  v |= pm;
1407 	  if (pm)
1408 	    {
1409 	      v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1410 	      v |= pm;
1411 	    }
1412 	}
1413       if (FLAG_PM)
1414 	{
1415 	  /* various things can't be changed in user mode.  */
1416 	  if (opcode->op[0].type == RX_Operand_Register)
1417 	    if (opcode->op[0].reg == 32)
1418 	      {
1419 		v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1420 		v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL);
1421 	      }
1422 	  if (opcode->op[0].reg == 34 /* ISP */
1423 	      || opcode->op[0].reg == 37 /* BPSW */
1424 	      || opcode->op[0].reg == 39 /* INTB */
1425 	      || opcode->op[0].reg == 38 /* VCT */)
1426 	    /* These are ignored.  */
1427 	    break;
1428 	}
1429       if (OM(0) && OM(1))
1430 	cycles (2);
1431       else
1432 	cycles (1);
1433 
1434       PD (v);
1435 
1436 #ifdef CYCLE_ACCURATE
1437       if ((opcode->op[0].type == RX_Operand_Predec
1438 	   && opcode->op[1].type == RX_Operand_Register)
1439 	  || (opcode->op[0].type == RX_Operand_Postinc
1440 	      && opcode->op[1].type == RX_Operand_Register))
1441 	{
1442 	  /* Special case: push reg doesn't cause a memory stall.  */
1443 	  memory_dest = 0;
1444 	  tprintf("push special case\n");
1445 	}
1446 #endif
1447 
1448       set_sz (v, DSZ());
1449       break;
1450 
1451     case RXO_movbi:
1452       PD (GS ());
1453       cycles (1);
1454       break;
1455 
1456     case RXO_movbir:
1457       PS (GD ());
1458       cycles (1);
1459       break;
1460 
1461     case RXO_mul:
1462       v = US2 ();
1463       ll = (unsigned long long) US1() * (unsigned long long) v;
1464       PD(ll);
1465       E (1);
1466       break;
1467 
1468     case RXO_mulhi:
1469       v = GS2 ();
1470       ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16);
1471       ll <<= 16;
1472       put_reg64 (acc64, ll);
1473       E1;
1474       break;
1475 
1476     case RXO_mullo:
1477       v = GS2 ();
1478       ll = (long long)(signed short)(GS()) * (long long)(signed short)(v);
1479       ll <<= 16;
1480       put_reg64 (acc64, ll);
1481       E1;
1482       break;
1483 
1484     case RXO_mvfachi:
1485       PD (get_reg (acchi));
1486       E1;
1487       break;
1488 
1489     case RXO_mvfaclo:
1490       PD (get_reg (acclo));
1491       E1;
1492       break;
1493 
1494     case RXO_mvfacmi:
1495       PD (get_reg (accmi));
1496       E1;
1497       break;
1498 
1499     case RXO_mvtachi:
1500       put_reg (acchi, GS ());
1501       E1;
1502       break;
1503 
1504     case RXO_mvtaclo:
1505       put_reg (acclo, GS ());
1506       E1;
1507       break;
1508 
1509     case RXO_mvtipl:
1510       regs.r_psw &= ~ FLAGBITS_IPL;
1511       regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL;
1512       E1;
1513       break;
1514 
1515     case RXO_nop:
1516     case RXO_nop2:
1517     case RXO_nop3:
1518     case RXO_nop4:
1519     case RXO_nop5:
1520     case RXO_nop6:
1521     case RXO_nop7:
1522       E1;
1523       break;
1524 
1525     case RXO_or:
1526       LOGIC_OP (|);
1527       break;
1528 
1529     case RXO_popm:
1530       /* POPM cannot pop R0 (sp).  */
1531       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1532 	EXCEPTION (EX_UNDEFINED);
1533       if (opcode->op[1].reg >= opcode->op[2].reg)
1534 	{
1535 	  regs.r_pc = opcode_pc;
1536 	  DO_RETURN (RX_MAKE_STOPPED (SIGILL));
1537 	}
1538       for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++)
1539 	{
1540 	  cycles (1);
1541 	  RLD (v);
1542 	  put_reg (v, pop ());
1543 	}
1544       break;
1545 
1546     case RXO_pushm:
1547       /* PUSHM cannot push R0 (sp).  */
1548       if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0)
1549 	EXCEPTION (EX_UNDEFINED);
1550       if (opcode->op[1].reg >= opcode->op[2].reg)
1551 	{
1552 	  regs.r_pc = opcode_pc;
1553 	  return RX_MAKE_STOPPED (SIGILL);
1554 	}
1555       for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--)
1556 	{
1557 	  RL (v);
1558 	  push (get_reg (v));
1559 	}
1560       cycles (opcode->op[2].reg - opcode->op[1].reg + 1);
1561       break;
1562 
1563     case RXO_racw:
1564       ll = get_reg64 (acc64) << GS ();
1565       ll += 0x80000000ULL;
1566       if ((signed long long)ll > (signed long long)0x00007fff00000000ULL)
1567 	ll = 0x00007fff00000000ULL;
1568       else if ((signed long long)ll < (signed long long)0xffff800000000000ULL)
1569 	ll = 0xffff800000000000ULL;
1570       else
1571 	ll &= 0xffffffff00000000ULL;
1572       put_reg64 (acc64, ll);
1573       E1;
1574       break;
1575 
1576     case RXO_rte:
1577       PRIVILEDGED ();
1578       regs.r_pc = poppc ();
1579       regs.r_psw = poppc ();
1580       if (FLAG_PM)
1581 	regs.r_psw |= FLAGBIT_U;
1582 #ifdef CYCLE_ACCURATE
1583       regs.fast_return = 0;
1584       cycles (6);
1585 #endif
1586       break;
1587 
1588     case RXO_revl:
1589       uma = GS ();
1590       umb = (((uma >> 24) & 0xff)
1591 	     | ((uma >> 8) & 0xff00)
1592 	     | ((uma << 8) & 0xff0000)
1593 	     | ((uma << 24) & 0xff000000UL));
1594       PD (umb);
1595       E1;
1596       break;
1597 
1598     case RXO_revw:
1599       uma = GS ();
1600       umb = (((uma >> 8) & 0x00ff00ff)
1601 	     | ((uma << 8) & 0xff00ff00UL));
1602       PD (umb);
1603       E1;
1604       break;
1605 
1606     case RXO_rmpa:
1607       RL(4);
1608       RL(5);
1609 #ifdef CYCLE_ACCURATE
1610       tx = regs.r[3];
1611 #endif
1612 
1613       while (regs.r[3] != 0)
1614 	{
1615 	  long long tmp;
1616 
1617 	  switch (opcode->size)
1618 	    {
1619 	    case RX_Long:
1620 	      ma = mem_get_si (regs.r[1]);
1621 	      mb = mem_get_si (regs.r[2]);
1622 	      regs.r[1] += 4;
1623 	      regs.r[2] += 4;
1624 	      break;
1625 	    case RX_Word:
1626 	      ma = sign_ext (mem_get_hi (regs.r[1]), 16);
1627 	      mb = sign_ext (mem_get_hi (regs.r[2]), 16);
1628 	      regs.r[1] += 2;
1629 	      regs.r[2] += 2;
1630 	      break;
1631 	    case RX_Byte:
1632 	      ma = sign_ext (mem_get_qi (regs.r[1]), 8);
1633 	      mb = sign_ext (mem_get_qi (regs.r[2]), 8);
1634 	      regs.r[1] += 1;
1635 	      regs.r[2] += 1;
1636 	      break;
1637 	    default:
1638 	      abort ();
1639 	    }
1640 	  /* We do the multiply as a signed value.  */
1641 	  sll = (long long)ma * (long long)mb;
1642 	  tprintf("        %016llx = %d * %d\n", sll, ma, mb);
1643 	  /* but we do the sum as unsigned, while sign extending the operands.  */
1644 	  tmp = regs.r[4] + (sll & 0xffffffffUL);
1645 	  regs.r[4] = tmp & 0xffffffffUL;
1646 	  tmp >>= 32;
1647 	  sll >>= 32;
1648 	  tmp += regs.r[5] + (sll & 0xffffffffUL);
1649 	  regs.r[5] = tmp & 0xffffffffUL;
1650 	  tmp >>= 32;
1651 	  sll >>= 32;
1652 	  tmp += regs.r[6] + (sll & 0xffffffffUL);
1653 	  regs.r[6] = tmp & 0xffffffffUL;
1654 	  tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1655 		  (unsigned long) regs.r[6],
1656 		  (unsigned long) regs.r[5],
1657 		  (unsigned long) regs.r[4]);
1658 
1659 	  regs.r[3] --;
1660 	}
1661       if (regs.r[6] & 0x00008000)
1662 	regs.r[6] |= 0xffff0000UL;
1663       else
1664 	regs.r[6] &= 0x0000ffff;
1665       ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0;
1666       if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL)
1667 	set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O);
1668       else
1669 	set_flags (FLAGBIT_O|FLAGBIT_S, ma);
1670 #ifdef CYCLE_ACCURATE
1671       switch (opcode->size)
1672 	{
1673 	case RX_Long:
1674 	  cycles (6 + 4 * tx);
1675 	  break;
1676 	case RX_Word:
1677 	  cycles (6 + 5 * (tx / 2) + 4 * (tx % 2));
1678 	  break;
1679 	case RX_Byte:
1680 	  cycles (6 + 7 * (tx / 4) + 4 * (tx % 4));
1681 	  break;
1682 	default:
1683 	  abort ();
1684 	}
1685 #endif
1686       break;
1687 
1688     case RXO_rolc:
1689       v = GD ();
1690       ma = v & 0x80000000UL;
1691       v <<= 1;
1692       v |= carry;
1693       set_szc (v, 4, ma);
1694       PD (v);
1695       E1;
1696       break;
1697 
1698     case RXO_rorc:
1699       uma = GD ();
1700       mb = uma & 1;
1701       uma >>= 1;
1702       uma |= (carry ? 0x80000000UL : 0);
1703       set_szc (uma, 4, mb);
1704       PD (uma);
1705       E1;
1706       break;
1707 
1708     case RXO_rotl:
1709       mb = GS ();
1710       uma = GD ();
1711       if (mb)
1712 	{
1713 	  uma = (uma << mb) | (uma >> (32-mb));
1714 	  mb = uma & 1;
1715 	}
1716       set_szc (uma, 4, mb);
1717       PD (uma);
1718       E1;
1719       break;
1720 
1721     case RXO_rotr:
1722       mb = GS ();
1723       uma = GD ();
1724       if (mb)
1725 	{
1726 	  uma = (uma >> mb) | (uma << (32-mb));
1727 	  mb = uma & 0x80000000;
1728 	}
1729       set_szc (uma, 4, mb);
1730       PD (uma);
1731       E1;
1732       break;
1733 
1734     case RXO_round:
1735       ma = GS ();
1736       FPCLEAR ();
1737       mb = rxfp_ftoi (ma, regs.r_fpsw);
1738       FPCHECK ();
1739       PD (mb);
1740       tprintf("(int) %g = %d\n", int2float(ma), mb);
1741       set_sz (mb, 4);
1742       E (2);
1743       break;
1744 
1745     case RXO_rts:
1746       {
1747 #ifdef CYCLE_ACCURATE
1748 	int cyc = 5;
1749 #endif
1750 	regs.r_pc = poppc ();
1751 #ifdef CYCLE_ACCURATE
1752 	/* Note: specs say 5, chip says 3.  */
1753 	if (regs.fast_return && regs.link_register == regs.r_pc)
1754 	  {
1755 #ifdef CYCLE_STATS
1756 	    fast_returns ++;
1757 #endif
1758 	    tprintf("fast return bonus\n");
1759 	    cyc -= 2;
1760 	  }
1761 	cycles (cyc);
1762 	regs.fast_return = 0;
1763 	branch_alignment_penalty = 1;
1764 #endif
1765       }
1766       break;
1767 
1768     case RXO_rtsd:
1769       if (opcode->op[2].type == RX_Operand_Register)
1770 	{
1771 	  int i;
1772 	  /* RTSD cannot pop R0 (sp).  */
1773 	  put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4);
1774 	  if (opcode->op[2].reg == 0)
1775 	    EXCEPTION (EX_UNDEFINED);
1776 #ifdef CYCLE_ACCURATE
1777 	  tx = opcode->op[0].reg - opcode->op[2].reg + 1;
1778 #endif
1779 	  for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++)
1780 	    {
1781 	      RLD (i);
1782 	      put_reg (i, pop ());
1783 	    }
1784 	}
1785       else
1786 	{
1787 #ifdef CYCLE_ACCURATE
1788 	  tx = 0;
1789 #endif
1790 	  put_reg (0, get_reg (0) + GS());
1791 	}
1792       put_reg (pc, poppc());
1793 #ifdef CYCLE_ACCURATE
1794       if (regs.fast_return && regs.link_register == regs.r_pc)
1795 	{
1796 	  tprintf("fast return bonus\n");
1797 #ifdef CYCLE_STATS
1798 	  fast_returns ++;
1799 #endif
1800 	  cycles (tx < 3 ? 3 : tx + 1);
1801 	}
1802       else
1803 	{
1804 	  cycles (tx < 5 ? 5 : tx + 1);
1805 	}
1806       regs.fast_return = 0;
1807       branch_alignment_penalty = 1;
1808 #endif
1809       break;
1810 
1811     case RXO_sat:
1812       if (FLAG_O && FLAG_S)
1813 	PD (0x7fffffffUL);
1814       else if (FLAG_O && ! FLAG_S)
1815 	PD (0x80000000UL);
1816       E1;
1817       break;
1818 
1819     case RXO_satr:
1820       if (FLAG_O && ! FLAG_S)
1821 	{
1822 	  put_reg (6, 0x0);
1823 	  put_reg (5, 0x7fffffff);
1824 	  put_reg (4, 0xffffffff);
1825 	}
1826       else if (FLAG_O && FLAG_S)
1827 	{
1828 	  put_reg (6, 0xffffffff);
1829 	  put_reg (5, 0x80000000);
1830 	  put_reg (4, 0x0);
1831 	}
1832       E1;
1833       break;
1834 
1835     case RXO_sbb:
1836       MATH_OP (-, ! carry);
1837       break;
1838 
1839     case RXO_sccnd:
1840       if (GS())
1841 	PD (1);
1842       else
1843 	PD (0);
1844       E1;
1845       break;
1846 
1847     case RXO_scmpu:
1848 #ifdef CYCLE_ACCURATE
1849       tx = regs.r[3];
1850 #endif
1851       while (regs.r[3] != 0)
1852 	{
1853 	  uma = mem_get_qi (regs.r[1] ++);
1854 	  umb = mem_get_qi (regs.r[2] ++);
1855 	  regs.r[3] --;
1856 	  if (uma != umb || uma == 0)
1857 	    break;
1858 	}
1859       if (uma == umb)
1860 	set_zc (1, 1);
1861       else
1862 	set_zc (0, ((int)uma - (int)umb) >= 0);
1863       cycles (2 + 4 * (tx / 4) + 4 * (tx % 4));
1864       break;
1865 
1866     case RXO_setpsw:
1867       v = 1 << opcode->op[0].reg;
1868       if (FLAG_PM
1869 	  && (v == FLAGBIT_I
1870 	      || v == FLAGBIT_U))
1871 	break;
1872       regs.r_psw |= v;
1873       cycles (1);
1874       break;
1875 
1876     case RXO_smovb:
1877       RL (3);
1878 #ifdef CYCLE_ACCURATE
1879       tx = regs.r[3];
1880 #endif
1881       while (regs.r[3])
1882 	{
1883 	  uma = mem_get_qi (regs.r[2] --);
1884 	  mem_put_qi (regs.r[1]--, uma);
1885 	  regs.r[3] --;
1886 	}
1887 #ifdef CYCLE_ACCURATE
1888       if (tx > 3)
1889 	cycles (6 + 3 * (tx / 4) + 3 * (tx % 4));
1890       else
1891 	cycles (2 + 3 * (tx % 4));
1892 #endif
1893       break;
1894 
1895     case RXO_smovf:
1896       RL (3);
1897 #ifdef CYCLE_ACCURATE
1898       tx = regs.r[3];
1899 #endif
1900       while (regs.r[3])
1901 	{
1902 	  uma = mem_get_qi (regs.r[2] ++);
1903 	  mem_put_qi (regs.r[1]++, uma);
1904 	  regs.r[3] --;
1905 	}
1906       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1907       break;
1908 
1909     case RXO_smovu:
1910 #ifdef CYCLE_ACCURATE
1911       tx = regs.r[3];
1912 #endif
1913       while (regs.r[3] != 0)
1914 	{
1915 	  uma = mem_get_qi (regs.r[2] ++);
1916 	  mem_put_qi (regs.r[1]++, uma);
1917 	  regs.r[3] --;
1918 	  if (uma == 0)
1919 	    break;
1920 	}
1921       cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4));
1922       break;
1923 
1924     case RXO_shar: /* d = ma >> mb */
1925       SHIFT_OP (sll, int, mb, >>=, 1);
1926       E (1);
1927       break;
1928 
1929     case RXO_shll: /* d = ma << mb */
1930       SHIFT_OP (ll, int, mb, <<=, 0x80000000UL);
1931       E (1);
1932       break;
1933 
1934     case RXO_shlr: /* d = ma >> mb */
1935       SHIFT_OP (ll, unsigned int, mb, >>=, 1);
1936       E (1);
1937       break;
1938 
1939     case RXO_sstr:
1940       RL (3);
1941 #ifdef CYCLE_ACCURATE
1942       tx = regs.r[3];
1943 #endif
1944       switch (opcode->size)
1945 	{
1946 	case RX_Long:
1947 	  while (regs.r[3] != 0)
1948 	    {
1949 	      mem_put_si (regs.r[1], regs.r[2]);
1950 	      regs.r[1] += 4;
1951 	      regs.r[3] --;
1952 	    }
1953 	  cycles (2 + tx);
1954 	  break;
1955 	case RX_Word:
1956 	  while (regs.r[3] != 0)
1957 	    {
1958 	      mem_put_hi (regs.r[1], regs.r[2]);
1959 	      regs.r[1] += 2;
1960 	      regs.r[3] --;
1961 	    }
1962 	  cycles (2 + (int)(tx / 2) + tx % 2);
1963 	  break;
1964 	case RX_Byte:
1965 	  while (regs.r[3] != 0)
1966 	    {
1967 	      mem_put_qi (regs.r[1], regs.r[2]);
1968 	      regs.r[1] ++;
1969 	      regs.r[3] --;
1970 	    }
1971 	  cycles (2 + (int)(tx / 4) + tx % 4);
1972 	  break;
1973 	default:
1974 	  abort ();
1975 	}
1976       break;
1977 
1978     case RXO_stcc:
1979       if (GS2())
1980 	PD (GS ());
1981       E1;
1982       break;
1983 
1984     case RXO_stop:
1985       PRIVILEDGED ();
1986       regs.r_psw |= FLAGBIT_I;
1987       DO_RETURN (RX_MAKE_STOPPED(0));
1988 
1989     case RXO_sub:
1990       MATH_OP (-, 0);
1991       break;
1992 
1993     case RXO_suntil:
1994       RL(3);
1995 #ifdef CYCLE_ACCURATE
1996       tx = 0;
1997 #endif
1998       if (regs.r[3] == 0)
1999 	{
2000 	  cycles (3);
2001 	  break;
2002 	}
2003       switch (opcode->size)
2004 	{
2005 	case RX_Long:
2006 	  uma = get_reg (2);
2007 	  while (regs.r[3] != 0)
2008 	    {
2009 	      regs.r[3] --;
2010 	      umb = mem_get_si (get_reg (1));
2011 	      regs.r[1] += 4;
2012 #ifdef CYCLE_ACCURATE
2013 	      tx ++;
2014 #endif
2015 	      if (umb == uma)
2016 		break;
2017 	    }
2018 #ifdef CYCLE_ACCURATE
2019 	  cycles (3 + 3 * tx);
2020 #endif
2021 	  break;
2022 	case RX_Word:
2023 	  uma = get_reg (2) & 0xffff;
2024 	  while (regs.r[3] != 0)
2025 	    {
2026 	      regs.r[3] --;
2027 	      umb = mem_get_hi (get_reg (1));
2028 	      regs.r[1] += 2;
2029 #ifdef CYCLE_ACCURATE
2030 	      tx ++;
2031 #endif
2032 	      if (umb == uma)
2033 		break;
2034 	    }
2035 #ifdef CYCLE_ACCURATE
2036 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2037 #endif
2038 	  break;
2039 	case RX_Byte:
2040 	  uma = get_reg (2) & 0xff;
2041 	  while (regs.r[3] != 0)
2042 	    {
2043 	      regs.r[3] --;
2044 	      umb = mem_get_qi (regs.r[1]);
2045 	      regs.r[1] += 1;
2046 #ifdef CYCLE_ACCURATE
2047 	      tx ++;
2048 #endif
2049 	      if (umb == uma)
2050 		break;
2051 	    }
2052 #ifdef CYCLE_ACCURATE
2053 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2054 #endif
2055 	  break;
2056 	default:
2057 	  abort();
2058 	}
2059       if (uma == umb)
2060 	set_zc (1, 1);
2061       else
2062 	set_zc (0, ((int)uma - (int)umb) >= 0);
2063       break;
2064 
2065     case RXO_swhile:
2066       RL(3);
2067 #ifdef CYCLE_ACCURATE
2068       tx = 0;
2069 #endif
2070       if (regs.r[3] == 0)
2071 	break;
2072       switch (opcode->size)
2073 	{
2074 	case RX_Long:
2075 	  uma = get_reg (2);
2076 	  while (regs.r[3] != 0)
2077 	    {
2078 	      regs.r[3] --;
2079 	      umb = mem_get_si (get_reg (1));
2080 	      regs.r[1] += 4;
2081 #ifdef CYCLE_ACCURATE
2082 	      tx ++;
2083 #endif
2084 	      if (umb != uma)
2085 		break;
2086 	    }
2087 #ifdef CYCLE_ACCURATE
2088 	  cycles (3 + 3 * tx);
2089 #endif
2090 	  break;
2091 	case RX_Word:
2092 	  uma = get_reg (2) & 0xffff;
2093 	  while (regs.r[3] != 0)
2094 	    {
2095 	      regs.r[3] --;
2096 	      umb = mem_get_hi (get_reg (1));
2097 	      regs.r[1] += 2;
2098 #ifdef CYCLE_ACCURATE
2099 	      tx ++;
2100 #endif
2101 	      if (umb != uma)
2102 		break;
2103 	    }
2104 #ifdef CYCLE_ACCURATE
2105 	  cycles (3 + 3 * (tx / 2) + 3 * (tx % 2));
2106 #endif
2107 	  break;
2108 	case RX_Byte:
2109 	  uma = get_reg (2) & 0xff;
2110 	  while (regs.r[3] != 0)
2111 	    {
2112 	      regs.r[3] --;
2113 	      umb = mem_get_qi (regs.r[1]);
2114 	      regs.r[1] += 1;
2115 #ifdef CYCLE_ACCURATE
2116 	      tx ++;
2117 #endif
2118 	      if (umb != uma)
2119 		break;
2120 	    }
2121 #ifdef CYCLE_ACCURATE
2122 	  cycles (3 + 3 * (tx / 4) + 3 * (tx % 4));
2123 #endif
2124 	  break;
2125 	default:
2126 	  abort();
2127 	}
2128       if (uma == umb)
2129 	set_zc (1, 1);
2130       else
2131 	set_zc (0, ((int)uma - (int)umb) >= 0);
2132       break;
2133 
2134     case RXO_wait:
2135       PRIVILEDGED ();
2136       regs.r_psw |= FLAGBIT_I;
2137       DO_RETURN (RX_MAKE_STOPPED(0));
2138 
2139     case RXO_xchg:
2140 #ifdef CYCLE_ACCURATE
2141       regs.m2m = 0;
2142 #endif
2143       v = GS (); /* This is the memory operand, if any.  */
2144       PS (GD ()); /* and this may change the address register.  */
2145       PD (v);
2146       E2;
2147 #ifdef CYCLE_ACCURATE
2148       /* all M cycles happen during xchg's cycles.  */
2149       memory_dest = 0;
2150       memory_source = 0;
2151 #endif
2152       break;
2153 
2154     case RXO_xor:
2155       LOGIC_OP (^);
2156       break;
2157 
2158     default:
2159       EXCEPTION (EX_UNDEFINED);
2160     }
2161 
2162 #ifdef CYCLE_ACCURATE
2163   regs.m2m = 0;
2164   if (memory_source)
2165     regs.m2m |= M2M_SRC;
2166   if (memory_dest)
2167     regs.m2m |= M2M_DST;
2168 
2169   regs.rt = new_rt;
2170   new_rt = -1;
2171 #endif
2172 
2173 #ifdef CYCLE_STATS
2174   if (prev_cycle_count == regs.cycle_count)
2175     {
2176       printf("Cycle count not updated! id %s\n", id_names[opcode->id]);
2177       abort ();
2178     }
2179 #endif
2180 
2181 #ifdef CYCLE_STATS
2182   if (running_benchmark)
2183     {
2184       int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type);
2185 
2186 
2187       cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count;
2188       times_per_id[opcode->id][omap] ++;
2189 
2190       times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++;
2191 
2192       prev_opcode_id = opcode->id;
2193       po0 = omap;
2194     }
2195 #endif
2196 
2197   return RX_MAKE_STEPPED ();
2198 }
2199 
2200 #ifdef CYCLE_STATS
2201 void
2202 reset_pipeline_stats (void)
2203 {
2204   memset (cycles_per_id, 0, sizeof(cycles_per_id));
2205   memset (times_per_id, 0, sizeof(times_per_id));
2206   memory_stalls = 0;
2207   register_stalls = 0;
2208   branch_stalls = 0;
2209   branch_alignment_stalls = 0;
2210   fast_returns = 0;
2211   memset (times_per_pair, 0, sizeof(times_per_pair));
2212   running_benchmark = 1;
2213 
2214   benchmark_start_cycle = regs.cycle_count;
2215 }
2216 
2217 void
2218 halt_pipeline_stats (void)
2219 {
2220   running_benchmark = 0;
2221   benchmark_end_cycle = regs.cycle_count;
2222 }
2223 #endif
2224 
2225 void
2226 pipeline_stats (void)
2227 {
2228 #ifdef CYCLE_STATS
2229   int i, o1;
2230   int p, p1;
2231 #endif
2232 
2233 #ifdef CYCLE_ACCURATE
2234   if (verbose == 1)
2235     {
2236       printf ("cycles: %llu\n", regs.cycle_count);
2237       return;
2238     }
2239 
2240   printf ("cycles: %13s\n", comma (regs.cycle_count));
2241 #endif
2242 
2243 #ifdef CYCLE_STATS
2244   if (benchmark_start_cycle)
2245     printf ("bmark:  %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle));
2246 
2247   printf("\n");
2248   for (i = 0; i < N_RXO; i++)
2249     for (o1 = 0; o1 < N_MAP; o1 ++)
2250       if (times_per_id[i][o1])
2251 	printf("%13s %13s %7.2f  %s %s\n",
2252 	       comma (cycles_per_id[i][o1]),
2253 	       comma (times_per_id[i][o1]),
2254 	       (double)cycles_per_id[i][o1] / times_per_id[i][o1],
2255 	       op_cache_string(o1),
2256 	       id_names[i]+4);
2257 
2258   printf("\n");
2259   for (p = 0; p < N_RXO; p ++)
2260     for (p1 = 0; p1 < N_MAP; p1 ++)
2261       for (i = 0; i < N_RXO; i ++)
2262 	for (o1 = 0; o1 < N_MAP; o1 ++)
2263 	  if (times_per_pair[p][p1][i][o1])
2264 	    {
2265 	      printf("%13s   %s %-9s  ->  %s %s\n",
2266 		     comma (times_per_pair[p][p1][i][o1]),
2267 		     op_cache_string(p1),
2268 		     id_names[p]+4,
2269 		     op_cache_string(o1),
2270 		     id_names[i]+4);
2271 	    }
2272 
2273   printf("\n");
2274   printf("%13s memory stalls\n", comma (memory_stalls));
2275   printf("%13s register stalls\n", comma (register_stalls));
2276   printf("%13s branches taken (non-return)\n", comma (branch_stalls));
2277   printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls));
2278   printf("%13s fast returns\n", comma (fast_returns));
2279 #endif
2280 }
2281